Intro

In this script, I load exchange data from datras and calculate catch of cod and flounder in unit kg/km^2 (with TVL gear), by correcting for gear dimensions, sweeplength and trawl speed, following Orio et al 2017. I also add oxygen, temperature and depth covariates, which I use for modelling this biomass index.

Load libraries

rm(list = ls())

library(tidyverse); theme_set(theme_light(base_size = 12))
library(readxl)
library(tidylog)
library(RCurl)
library(viridis)
library(RColorBrewer)
library(patchwork)
library(janitor)
library(icesDatras)
library(mapdata)
library(patchwork)
library(rgdal)
library(raster)
library(sf)
library(rgeos)
library(chron)
library(lattice)
library(ncdf4)
library(sdmTMB) # remotes::install_github("pbs-assess/sdmTMB")
library(marmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(geosphere)
#library(gganimate)

world <- ne_countries(scale = "medium", returnclass = "sf")

For maps

# Specify map ranges
ymin = 54; ymax = 58; xmin = 12; xmax = 22

map_data <- rnaturalearth::ne_countries(
  scale = "medium",
  returnclass = "sf", continent = "europe")

# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
  st_crop(map_data,
          c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))

# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)

#ggplot(swe_coast_proj) + geom_sf() 

Read data

# Read HH data
# bits_hh <- getDATRAS(record = "HH", survey = "BITS", years = 1991:2020, quarters = 4)
# write.csv(bits_hh, "data/DATRAS_exchange/bits_hh.csv")
bits_hh <- read.csv("data/DATRAS_exchange/bits_hh.csv")

# Read HL data
# bits_hl <- getDATRAS(record = "HL", survey = "BITS", years = 1991:2020, quarters = 4)
# write.csv(bits_hl, "data/DATRAS_exchange/bits_hl.csv")
bits_hl <- read.csv("data/DATRAS_exchange/bits_hl.csv")

# Read CA data
# bits_ca <- getDATRAS(record = "CA", survey = "BITS", years = 1991:2020, quarters = 4)
# write.csv(bits_ca, "data/DATRAS_exchange/bits_ca.csv")
bits_ca <- read.csv("data/DATRAS_exchange/bits_ca.csv")

# Read gear standardization data 
#sweep <- read.csv("data/from_ale/sweep_9116.csv", sep = ";", dec = ",", fileEncoding = "latin1")
sweep <- read.csv("data/from_ale/sweep_9118_ml.csv", sep = ";", fileEncoding = "latin1")

Standardize catch data

Standardize ships

# Before creating a a new ID, make sure that countries and ships names use the same format
sort(unique(sweep$Ship))
#>  [1] "26HF" "ATL"  "ATLD" "BAL"  "BALL" "BPE"  "CEV"  "CLP"  "CLV"  "COML"
#> [11] "DAN2" "DANS" "DAR"  "GDY"  "HAF"  "KOH"  "KOOT" "MON"  "MONL" "SOL" 
#> [21] "SOL2" "VSH"  "ZBA"
sort(unique(bits_hh$Ship))
#>  [1] "06S1" "06SL" "26D4" "26HF" "26HI" "67BC" "77AR" "77SE" "AA36" "ESLF"
#> [11] "ESTM" "LTDA" "RUJB" "RUNT"
sort(unique(bits_hl$Ship))
#>  [1] "06S1" "06SL" "26D4" "26HF" "26HI" "67BC" "77AR" "77SE" "AA36" "ESLF"
#> [11] "ESTM" "LTDA" "RUJB" "RUNT"

# Change back to the old Ship name standard...
# https://vocab.ices.dk/?ref=315
# https://vocab.ices.dk/?ref=315
# Assumptions:
# SOL is Solea on ICES links above, and SOL1 is the older one of the two SOLs (1 and 2)
# DAN is Dana
# sweep %>% filter(Ship == "DANS") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "67BC") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "26D4") %>% distinct(Year) # Strange that 26DF doesn't extend far back. Which ship did the Danes use? Ok, I have no Danish data that old.
# bits_hh %>% filter(Country == "DK") %>% distinct(Year)

bits_hh <- bits_hh %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

bits_hl <- bits_hl %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

bits_ca <- bits_ca %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

# Ok, which ships are missing in the exchange data?
unique(bits_hh$Ship3)[!unique(bits_hh$Ship3) %in% unique(sweep$Ship)]
#> [1] "ARG"  "AA36" "77SE"
# Swedish Ships and unidentified ships are NOT in the Sweep data
unique(sweep$Ship3)[!unique(sweep$Ship3) %in% unique(bits_hh$Ship3)]
#> NULL
# But all Sweep Ships are in the exchange data

Standardize countries

# Now check which country codes are used
sort(unique(sweep$Country))
#> [1] "DEN" "EST" "GFR" "LAT" "LTU" "POL" "RUS" "SWE"
sort(unique(bits_hh$Country))
#> [1] "DE" "DK" "EE" "LT" "LV" "PL" "RU" "SE"

# https://www.nationsonline.org/oneworld/country_code_list.htm#E
bits_hh <- bits_hh %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

bits_hl <- bits_hl %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

bits_ca <- bits_ca %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

# Gear? Are they the same?
sort(unique(bits_hh$Gear))
#>  [1] "ESB" "EXP" "FOT" "GOV" "GRT" "H20" "LBT" "PEL" "SON" "TVL" "TVS"
sort(unique(bits_hl$Gear))
#>  [1] "ESB" "EXP" "FOT" "GOV" "GRT" "H20" "LBT" "PEL" "SON" "TVL" "TVS"
sort(unique(sweep$Gear))
#>  [1] "CAM" "CHP" "DT"  "EGY" "ESB" "EXP" "GRT" "H20" "HAK" "LBT" "LPT" "P20"
#> [13] "PEL" "SON" "TVL" "TVS"

# Which gears are NOT in the sweep data?
unique(bits_hl$Gear)[!unique(bits_hl$Gear) %in% unique(sweep$Gear)] 
#> [1] "FOT" "GOV"

Create a simple ID that works across all exchange data

# Create ID column
bits_ca <- bits_ca %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

bits_hl <- bits_hl %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

bits_hh <- bits_hh %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

# Works like a haul-id
# bits_hh %>% group_by(IDx) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)

Create the same unique haul-ID in the cpue data that I have in the sweep-file

bits_hl <- bits_hl %>% 
  mutate(haul.id = paste(Year, Quarter, Country, Ship3, Gear, StNo, HaulNo, sep = ":")) 

bits_hh <- bits_hh %>% 
  mutate(haul.id = paste(Year, Quarter, Country, Ship3, Gear, StNo, HaulNo, sep = ":")) 

Clean DATRAS EXCHANGE data

# Select just valid, additional and no oxygen hauls
bits_hh <- bits_hh %>%
  #filter(!Country == "SWE") %>% # I'll deal with Sweden later...
  filter(HaulVal %in% c("A","N","V"))

# Add ICES rectangle
bits_hh$Rect <- mapplots::ices.rect2(lon = bits_hh$ShootLong, lat = bits_hh$ShootLat)

# Add ICES subdivisions
shape <- shapefile("data/ICES_StatRec_mapto_ICES_Areas/StatRec_map_Areas_Full_20170124.shp")

pts <- SpatialPoints(cbind(bits_hh$ShootLong, bits_hh$ShootLat), 
                     proj4string = CRS(proj4string(shape)))
#> Warning in proj4string(shape): CRS object has comment, which is lost in output

bits_hh$SD <- over(pts, shape)$Area_27

# Rename subdivisions to the more common names and do some more filtering (by sub div and area)
sort(unique(bits_hh$SD))
#>  [1] "3.a.20"   "3.a.21"   "3.b.23"   "3.c.22"   "3.d.24"   "3.d.25"  
#>  [7] "3.d.26"   "3.d.27"   "3.d.28.1" "3.d.28.2" "3.d.29"

bits_hh <- bits_hh %>% 
  mutate(SD = factor(SD),
         SD = fct_recode(SD,
                         "20" = "3.a.20",
                         "21" = "3.a.21",
                         "22" = "3.c.22",
                         "23" = "3.b.23",
                         "24" = "3.d.24",
                         "25" = "3.d.25",
                         "26" = "3.d.26",
                         "27" = "3.d.27",
                         "28" = "3.d.28.1",
                         "28" = "3.d.28.2",
                         "29" = "3.d.29",
                         "30" = "3.d.30"),
         SD = as.character(SD)) 
#> Warning: Problem with `mutate()` input `SD`.
#> ℹ Unknown levels in `f`: 3.d.30
#> ℹ Input `SD` is `fct_recode(...)`.
#> Warning: Unknown levels in `f`: 3.d.30

# Now add the fishing line information from the sweep file (we need that later
# to standardize based on gear geometry). We add in the in the HH data and then
# transfer it to the other exchange data files when left_joining.
# Check which Fishing lines I have in the sweep data:
fishing_line <- sweep %>% group_by(Gear) %>% distinct(Fishing.line)

bits_hh <- left_join(bits_hh, fishing_line)
# sweep %>% group_by(Gear) %>% distinct(Fishing.line)
# bits_hh %>% group_by(Gear) %>% distinct(Fishing.line)
bits_hh$Fishing.line <- as.numeric(bits_hh$Fishing.line)

# Which gears do now have fishing line?
bits_hh$Fishing.line[is.na(bits_hh$Fishing.line)] <- -9
bits_hh %>% filter(Fishing.line == -9) %>% distinct(Gear)
#>   Gear
#> 1  FOT
#> 2  GOV
#> 3  ESB
#> 4  GRT

# FROM the index files (Orio)
# FOT has 83
# GOV has 160
# ESB ??
# GRT ??

# Add these values:
bits_hh <- bits_hh %>% mutate(Fishing.line = ifelse(Gear == "FOT", 83, Fishing.line))
bits_hh <- bits_hh %>% mutate(Fishing.line = ifelse(Gear == "GOV", 160, Fishing.line))

# Now select the hauls in the HH data when subsetting the HL data
bits_hl <- bits_hl %>%
  filter(haul.id %in% bits_hh$haul.id)

# Match columns from the HH data to the HL and CA data
sort(unique(bits_hh$SD))
#>  [1] "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
sort(colnames(bits_hh))
#>  [1] "BotCurDir"         "BotCurSpeed"       "BotSal"           
#>  [4] "BotTemp"           "Buoyancy"          "BySpecRecCode"    
#>  [7] "CodendMesh"        "Country"           "DataType"         
#> [10] "DateofCalculation" "Day"               "DayNight"         
#> [13] "Depth"             "DepthStratum"      "Distance"         
#> [16] "DoorSpread"        "DoorSurface"       "DoorType"         
#> [19] "DoorWgt"           "Fishing.line"      "Gear"             
#> [22] "GearEx"            "GroundSpeed"       "haul.id"          
#> [25] "HaulDur"           "HaulLat"           "HaulLong"         
#> [28] "HaulNo"            "HaulVal"           "HydroStNo"        
#> [31] "IDx"               "KiteDim"           "MaxTrawlDepth"    
#> [34] "MinTrawlDepth"     "Month"             "Netopening"       
#> [37] "PelSampType"       "Quarter"           "RecordType"       
#> [40] "Rect"              "Rigging"           "SD"               
#> [43] "SecchiDepth"       "Ship"              "Ship2"            
#> [46] "Ship3"             "ShootLat"          "ShootLong"        
#> [49] "SpeedWater"        "StatRec"           "StdSpecRecCode"   
#> [52] "StNo"              "SurCurDir"         "SurCurSpeed"      
#> [55] "SurSal"            "SurTemp"           "Survey"           
#> [58] "SweepLngt"         "SwellDir"          "SwellHeight"      
#> [61] "ThClineDepth"      "ThermoCline"       "Tickler"          
#> [64] "TidePhase"         "TideSpeed"         "TimeShot"         
#> [67] "TowDir"            "Turbidity"         "WarpDen"          
#> [70] "Warpdia"           "Warplngt"          "WgtGroundRope"    
#> [73] "WindDir"           "WindSpeed"         "WingSpread"       
#> [76] "X"                 "Year"
bits_hh_merge <- bits_hh %>% 
                       dplyr::select(SD, Rect, HaulVal, StdSpecRecCode, BySpecRecCode, Fishing.line,
                                     DataType, HaulDur, GroundSpeed, haul.id, IDx, ShootLat, ShootLong)

bits_hl <- left_join(dplyr::select(bits_hl, -haul.id), bits_hh_merge, by = "IDx")
bits_ca <- left_join(bits_ca, bits_hh_merge, by = "IDx")

# Now filter the subdivisions I want from all data sets
bits_hh <- bits_hh %>% filter(SD %in% c(24, 25, 26, 27, 28))
bits_hl <- bits_hl %>% filter(SD %in% c(24, 25, 26, 27, 28))
bits_ca <- bits_ca %>% filter(SD %in% c(24, 25, 26, 27, 28))

Filter species

hlcod <- bits_hl %>%
  filter(SpecCode %in% c("126436", "164712")) %>% 
  mutate(Species = "Gadus morhua")

hlfle <- bits_hl %>%
  filter(SpecCode %in% c("127141", "172894")) %>% 
  mutate(Species = "Platichthys flesus")

Prepare to add 0 catches

# Find common columns in the HH and HL data (here already subset by species)
comcol <- intersect(names(hlcod), names(bits_hh))

# What is the proportion of zero-catch hauls?
# Here we don't have zero catches
hlcod %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(HLNoAtLngt)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  distinct(zero_catch)
#> # A tibble: 1 x 1
#>   zero_catch
#>   <chr>     
#> 1 N

# Cod: Add 0s and then remove lines with SpecVal = 0 (first NA because we don't have a match in the HH, then make them 0 later)
hlcod0 <- full_join(hlcod, bits_hh[, comcol], by = comcol)

# No zeroes yet
hlcod0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(HLNoAtLngt)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  distinct(zero_catch) 
#> # A tibble: 1 x 1
#>   zero_catch
#>   <lgl>     
#> 1 NA

hlcod0$SpecVal[is.na(hlcod0$SpecVal)] <- "zeroCatch"

hlcod0$SpecVal <- factor(hlcod0$SpecVal)

hlcod0 <-  hlcod0 %>% filter(!SpecVal == "0")

# Add species again after merge
hlcod0$Species<-"Gadus morhua"


# Flounder: Add 0s, remove them if StdSpecRecCode !=1 and then remove lines with SpecVal = 0
hlfle0 <- full_join(hlfle, bits_hh[, comcol], by = comcol)

hlfle0 <- hlfle0[!(is.na(hlfle0$Species) & hlfle0$StdSpecRecCode != 1),] 

hlfle0$SpecVal[is.na(hlfle0$SpecVal)] <- "zeroCatch"
hlfle0$SpecVal <- factor(hlfle0$SpecVal)

hlfle0 <-  hlfle0 %>% filter(!SpecVal == "0")

hlfle0$Species<-"Platichthys flesus"

# Check number of hauls per species
hlcod0 %>% distinct(haul.id) %>% nrow()
#> [1] 4392
hlfle0 %>% distinct(haul.id) %>% nrow()
#> [1] 4373

Create (unstandardized) CPUE for SpecVal=1. If DataType=C then CPUEun=HLNoAtLngt, if DataType=R then CPUEun=HLNoAtLngt/(HaulDur/60), if DataType=S then CPUEun=(HLNoAtLngt*SubFactor)/(HaulDur/60). If SpecVal="zeroCatch" then CPUEun=0, if SpecVal=4 we need to decide (no length measurements, only total catch). Note that here we also add zero CPUE if SpecVal=="zeroCatch".

Then I will sum for the same haul the CPUE of the same length classes if they were sampled with different subfactors or with different sexes.

# Cod
hlcod0 <- hlcod0 %>%
  mutate(CPUEun = ifelse(SpecVal == "1" & DataType == "C",
                         HLNoAtLngt,
                         
                         ifelse(SpecVal == "1" & DataType == "R",
                                HLNoAtLngt/(HaulDur/60),
                                
                                ifelse(SpecVal == "1" & DataType == "S",
                                       (HLNoAtLngt*SubFactor)/(HaulDur/60),
                                       
                                       ifelse(SpecVal == "zeroCatch", 0, NA)))))

# Plot and fill by zero catch
hlcod0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")


# Some rows have multiple rows per combination of length class and haul id,
# so we need to sum it up 
hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 2 x 1
#>       n
#>   <int>
#> 1     1
#> 2     2
hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2) %>% as.data.frame() %>% head(20)
#>         X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1   97185         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 2   97186         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 3   99360         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 4   99366         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 5  100061         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 6  100063         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 7  100064         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 8  100065         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 9  100066         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 10 100067         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 11 100068         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 12 100070         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 13 100072         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 14 100076         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 15 100078         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 16 100079         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 17 100081         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 18 100082         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 19 100084         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 20 100085         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#>    StNo HaulNo Year SpecCodeType SpecCode SpecVal Sex TotalNo CatIdentifier
#> 1    14     14 2000            T   164712       1   M       2             1
#> 2    14     14 2000            T   164712       1   F       4             1
#> 3    63     31 2001            T   164712       1   M       5             1
#> 4    63     31 2001            T   164712       1   F       7             1
#> 5    83     37 2001            T   164712       1   M      21             1
#> 6    83     37 2001            T   164712       1   M      21             1
#> 7    83     37 2001            T   164712       1   M      21             1
#> 8    83     37 2001            T   164712       1   M      21             1
#> 9    83     37 2001            T   164712       1   M      21             1
#> 10   83     37 2001            T   164712       1   M      21             1
#> 11   83     37 2001            T   164712       1   M      21             1
#> 12   83     37 2001            T   164712       1   M      21             1
#> 13   83     37 2001            T   164712       1   M      21             1
#> 14   83     37 2001            T   164712       1   F      42             1
#> 15   83     37 2001            T   164712       1   F      42             1
#> 16   83     37 2001            T   164712       1   F      42             1
#> 17   83     37 2001            T   164712       1   F      42             1
#> 18   83     37 2001            T   164712       1   F      42             1
#> 19   83     37 2001            T   164712       1   F      42             1
#> 20   83     37 2001            T   164712       1   F      42             1
#>    NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1       3         1     NA          32        1        35          2     <NA>
#> 2       3         1     NA          32        1        35          2     <NA>
#> 3       5         1     NA        3142        1        22          2     <NA>
#> 4       7         1     NA        3142        1        22          1     <NA>
#> 5      21         1     NA       64261        1        22          1     <NA>
#> 6      21         1     NA       64261        1        38          2     <NA>
#> 7      21         1     NA       64261        1        39          2     <NA>
#> 8      21         1     NA       64261        1        41          1     <NA>
#> 9      21         1     NA       64261        1        42          1     <NA>
#> 10     21         1     NA       64261        1        44          2     <NA>
#> 11     21         1     NA       64261        1        45          4     <NA>
#> 12     21         1     NA       64261        1        48          2     <NA>
#> 13     21         1     NA       64261        1        52          1     <NA>
#> 14     42         1     NA       64261        1        22          1     <NA>
#> 15     42         1     NA       64261        1        38          2     <NA>
#> 16     42         1     NA       64261        1        39          2     <NA>
#> 17     42         1     NA       64261        1        41          1     <NA>
#> 18     42         1     NA       64261        1        42          4     <NA>
#> 19     42         1     NA       64261        1        44          3     <NA>
#> 20     42         1     NA       64261        1        45          3     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20131112      126436  KOOT  KOOT
#> 2           NA          20131112      126436  KOOT  KOOT
#> 3           NA          20131113      126436  DAN2  DAN2
#> 4           NA          20131113      126436  DAN2  DAN2
#> 5           NA          20131113      126436  DAN2  DAN2
#> 6           NA          20131113      126436  DAN2  DAN2
#> 7           NA          20131113      126436  DAN2  DAN2
#> 8           NA          20131113      126436  DAN2  DAN2
#> 9           NA          20131113      126436  DAN2  DAN2
#> 10          NA          20131113      126436  DAN2  DAN2
#> 11          NA          20131113      126436  DAN2  DAN2
#> 12          NA          20131113      126436  DAN2  DAN2
#> 13          NA          20131113      126436  DAN2  DAN2
#> 14          NA          20131113      126436  DAN2  DAN2
#> 15          NA          20131113      126436  DAN2  DAN2
#> 16          NA          20131113      126436  DAN2  DAN2
#> 17          NA          20131113      126436  DAN2  DAN2
#> 18          NA          20131113      126436  DAN2  DAN2
#> 19          NA          20131113      126436  DAN2  DAN2
#> 20          NA          20131113      126436  DAN2  DAN2
#>                          IDx SD Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1  2000.4.EST.ESLF.TVS.14.14 28 45H1       V              1             1
#> 2  2000.4.EST.ESLF.TVS.14.14 28 45H1       V              1             1
#> 3  2001.4.DEN.26D4.TVL.63.31 26 39G8       V              1             1
#> 4  2001.4.DEN.26D4.TVL.63.31 26 39G8       V              1             1
#> 5  2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 6  2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 7  2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 8  2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 9  2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 10 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 11 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 12 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 13 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 14 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 15 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 16 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 17 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 18 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 19 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#> 20 2001.4.DEN.26D4.TVL.83.37 26 40G8       V              1             1
#>    Fishing.line DataType HaulDur GroundSpeed                   haul.id ShootLat
#> 1         33.22        C      30         3.0 2000:4:EST:KOOT:TVS:14:14  58.0167
#> 2         33.22        C      30         3.0 2000:4:EST:KOOT:TVS:14:14  58.0167
#> 3         63.46        R      30         3.0 2001:4:DEN:DAN2:TVL:63:31  55.4699
#> 4         63.46        R      30         3.0 2001:4:DEN:DAN2:TVL:63:31  55.4699
#> 5         63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 6         63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 7         63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 8         63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 9         63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 10        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 11        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 12        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 13        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 14        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 15        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 16        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 17        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 18        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 19        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#> 20        63.46        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37  55.6627
#>    ShootLong      Species   CPUEun n
#> 1    21.0833 Gadus morhua 2.000000 2
#> 2    21.0833 Gadus morhua 2.000000 2
#> 3    18.3116 Gadus morhua 4.000000 2
#> 4    18.3116 Gadus morhua 2.000000 2
#> 5    18.0414 Gadus morhua 1.935484 2
#> 6    18.0414 Gadus morhua 3.870968 2
#> 7    18.0414 Gadus morhua 3.870968 2
#> 8    18.0414 Gadus morhua 1.935484 2
#> 9    18.0414 Gadus morhua 1.935484 2
#> 10   18.0414 Gadus morhua 3.870968 2
#> 11   18.0414 Gadus morhua 7.741935 2
#> 12   18.0414 Gadus morhua 3.870968 2
#> 13   18.0414 Gadus morhua 1.935484 2
#> 14   18.0414 Gadus morhua 1.935484 2
#> 15   18.0414 Gadus morhua 3.870968 2
#> 16   18.0414 Gadus morhua 3.870968 2
#> 17   18.0414 Gadus morhua 1.935484 2
#> 18   18.0414 Gadus morhua 7.741935 2
#> 19   18.0414 Gadus morhua 5.806452 2
#> 20   18.0414 Gadus morhua 5.806452 2
test <- hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2)
test_id <- test$haul.id[2]

hlcodL <- hlcod0 %>% 
  group_by(LngtClass, haul.id) %>% 
  mutate(CPUEun = sum(CPUEun)) %>%
  ungroup() %>% 
  mutate(id3 = paste(haul.id, LngtClass)) %>% 
  distinct(id3, .keep_all = TRUE) %>% 
  dplyr::select(-X, -id3) # Clean up a bit

# Check with an ID
# filter(hlcod0, haul.id == test_id)
# filter(hlcodL, haul.id == test_id) %>% as.data.frame()

# Do we still have 0 catches?
hlcodL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")


# Flounder
hlfle0 <- hlfle0 %>%
  mutate(CPUEun = ifelse(SpecVal == "1" & DataType == "C",
                         HLNoAtLngt,
                         
                         ifelse(SpecVal == "1" & DataType == "R",
                                HLNoAtLngt/(HaulDur/60),
                                
                                ifelse(SpecVal == "1" & DataType == "S",
                                       (HLNoAtLngt*SubFactor)/(HaulDur/60),
                                       
                                       ifelse(SpecVal == "zeroCatch", 0, NA)))))

# Sum up the CPUES if multiple per length class and haul
hlfleL <- hlfle0 %>% 
  group_by(LngtClass, haul.id) %>% 
  mutate(CPUEun = sum(CPUEun)) %>%
  ungroup() %>% 
  mutate(id3 = paste(haul.id, LngtClass)) %>% 
  distinct(id3, .keep_all = TRUE) %>% 
  dplyr::select(-X, -id3)

Get and add annual weight-length relationships from the CA data for both cod and flounder so that I can calculate CPUE in biomass rather than numbers further down

# Cod
bits_ca_cod <- bits_ca %>% 
  filter(SpecCode %in% c("164712", "126436") & Year < 2020) %>% 
  mutate(StNo = as.numeric(StNo)) %>% 
  mutate(Species = "Cod") %>% 
  mutate(ID = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
#> Warning: Problem with `mutate()` input `StNo`.
#> ℹ NAs introduced by coercion
#> ℹ Input `StNo` is `as.numeric(StNo)`.
#> Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion

# Now I need to copy rows with NoAtLngt > 1 so that 1 row = 1 ind
# First make a small test
# nrow(bits_ca_cod)
# test_id <- head(filter(bits_ca_cod, CANoAtLngt == 5))$ID[1]
# filter(bits_ca_cod, ID == test_id & CANoAtLngt == 5)

bits_ca_cod <- bits_ca_cod %>% map_df(., rep, .$CANoAtLngt)

# head(data.frame(filter(bits_ca_cod, ID == test_id & CANoAtLngt == 5)), 20)
# nrow(bits_ca_cod)
# Looks ok!

# Standardize length and drop NA weights (need that for condition)
bits_ca_cod <- bits_ca_cod %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) # Standardize length ((https://vocab.ices.dk/?ref=18))

# Plot
ggplot(bits_ca_cod, aes(IndWgt, length_cm)) +
  geom_point() + 
  facet_wrap(~Year)


# Now extract the coefficients for each year (not bothering with outliers at the moment)
cod_intercept <- bits_ca_cod %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "(Intercept)") %>% 
  mutate(a = exp(estimate)) %>% 
  mutate(Year = as.integer(Year)) %>% 
  dplyr::select(Year, a)

cod_slope <- bits_ca_cod %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "log(length_cm)") %>% 
  mutate(Year = as.integer(Year)) %>% 
  rename("b" = "estimate") %>% 
  dplyr::select(Year, b)

# Flounder
bits_ca_fle <- bits_ca %>% 
  filter(SpecCode %in% c("127141", "172894") & Year < 2020) %>% 
  mutate(StNo = as.numeric(StNo)) %>% 
  mutate(Species = "Flounder") %>% 
  mutate(ID = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
#> Warning: Problem with `mutate()` input `StNo`.
#> ℹ NAs introduced by coercion
#> ℹ Input `StNo` is `as.numeric(StNo)`.

#> Warning: NAs introduced by coercion

bits_ca_fle <- bits_ca_fle %>% map_df(., rep, .$CANoAtLngt)

# Standardize length and drop NA weights (need that for condition)
bits_ca_fle <- bits_ca_fle %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) %>% # Standardize length ((https://vocab.ices.dk/?ref=18))
  mutate(keep = ifelse(LngtCode == "." & Year == 2008, "N", "Y")) %>%
  filter(keep == "Y") %>% 
  filter(length_cm < 70)

# Plot
ggplot(bits_ca_fle, aes(IndWgt, length_cm, color = LngtCode)) +
  geom_point() + 
  facet_wrap(~Year)


# Now extract the coefficients for each year (not bothering with outliers at the moment)
fle_intercept <- bits_ca_fle %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "(Intercept)") %>% 
  mutate(a = exp(estimate)) %>% 
  mutate(Year = as.integer(Year)) %>% 
  dplyr::select(Year, a)

fle_slope <- bits_ca_fle %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "log(length_cm)") %>% 
  mutate(Year = as.integer(Year)) %>% 
  rename("b" = "estimate") %>% 
  dplyr::select(Year, b)

Join the annual L-W relationships to the respective catch data to calculate CPUE in biomass not abundance

# These are the haul-data
# hlcodL
# hlfleL
hlcodL <- left_join(hlcodL, cod_intercept, by = "Year")
hlcodL <- left_join(hlcodL, cod_slope, by = "Year")

hlfleL <- left_join(hlfleL, fle_intercept, by = "Year")
hlfleL <- left_join(hlfleL, fle_slope, by = "Year")

Convert from CPUE in numbers to kg

# First standardize length to cm and then check how zero-catches are implemented at this stage
hlcodL <- hlcodL %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) # Standardize length ((https://vocab.ices.dk/?ref=18))
  
filter(hlcodL, length_cm == 0) # No such thing
#> # A tibble: 0 x 49
#> # … with 49 variables: RecordType <chr>, Survey <chr>, Quarter <int>,
#> #   Country <chr>, Ship <chr>, Gear <chr>, SweepLngt <int>, GearEx <chr>,
#> #   DoorType <lgl>, StNo <chr>, HaulNo <int>, Year <int>, SpecCodeType <chr>,
#> #   SpecCode <int>, SpecVal <fct>, Sex <chr>, TotalNo <dbl>,
#> #   CatIdentifier <int>, NoMeas <int>, SubFactor <dbl>, SubWgt <int>,
#> #   CatCatchWgt <int>, LngtCode <chr>, LngtClass <int>, HLNoAtLngt <dbl>,
#> #   DevStage <chr>, LenMeasType <int>, DateofCalculation <int>,
#> #   Valid_Aphia <int>, Ship2 <chr>, Ship3 <chr>, IDx <chr>, SD <chr>,
#> #   Rect <chr>, HaulVal <chr>, StdSpecRecCode <int>, BySpecRecCode <int>,
#> #   Fishing.line <dbl>, DataType <chr>, HaulDur <int>, GroundSpeed <dbl>,
#> #   haul.id <chr>, ShootLat <dbl>, ShootLong <dbl>, Species <chr>,
#> #   CPUEun <dbl>, a <dbl>, b <dbl>, length_cm <dbl>

# Now check if all rows where length is NA are the ones with zero catch!
hlcodL %>% 
  mutate(length2 = replace_na(length_cm, -9),
         no_length = ifelse(length2 < 0, "T", "F")) %>% 
  ggplot(., aes(length2, CPUEun, color = no_length)) + geom_point(alpha = 0.2) + facet_wrap(~no_length)


# Right, so all hauls with zero catch have NA length_cm. I don't have any NA catches
t <- hlcodL %>% drop_na(CPUEun)
t <- hlcodL %>% filter(CPUEun == 0)
t <- hlcodL %>% drop_na(length_cm)

# In other words, a zero catch is when the catch is zero and length_cm is NA
# In order to not get any NA CPUEs in unit biomass because length is NA (I want them instead
# to be 0, as the numbers-CPUE is), I will replace length_cm == NA with length_cm == 0 before
# calculating biomass cpue
hlcodL <- hlcodL %>% mutate(length_cm2 = replace_na(length_cm, 0))

# Standardize length in the haul-data and calculate weight
hlcodL <- hlcodL %>% 
  mutate(weight_kg = (a*length_cm2^b)/1000) %>% 
  mutate(CPUEun_kg = weight_kg*CPUEun)

# Plot and check it's correct also in this data
ggplot(hlcodL, aes(weight_kg, length_cm2)) +
  geom_point() + 
  facet_wrap(~Year)



# Now do the same for flounder
# First standardize length to cm and then check how zero-catches are implemented at this stage
hlfleL <- hlfleL %>% 
  mutate(length_cm = ifelse(LngtCode %in% c(".", "0"), 
                            LngtClass/10,
                            LngtClass)) # Standardize length (https://vocab.ices.dk/?ref=18)

filter(hlfleL, length_cm == 0) # No such thing
#> # A tibble: 0 x 49
#> # … with 49 variables: RecordType <chr>, Survey <chr>, Quarter <int>,
#> #   Country <chr>, Ship <chr>, Gear <chr>, SweepLngt <int>, GearEx <chr>,
#> #   DoorType <lgl>, StNo <chr>, HaulNo <int>, Year <int>, SpecCodeType <chr>,
#> #   SpecCode <int>, SpecVal <fct>, Sex <chr>, TotalNo <dbl>,
#> #   CatIdentifier <int>, NoMeas <int>, SubFactor <dbl>, SubWgt <int>,
#> #   CatCatchWgt <int>, LngtCode <chr>, LngtClass <int>, HLNoAtLngt <dbl>,
#> #   DevStage <chr>, LenMeasType <int>, DateofCalculation <int>,
#> #   Valid_Aphia <int>, Ship2 <chr>, Ship3 <chr>, IDx <chr>, SD <chr>,
#> #   Rect <chr>, HaulVal <chr>, StdSpecRecCode <int>, BySpecRecCode <int>,
#> #   Fishing.line <dbl>, DataType <chr>, HaulDur <int>, GroundSpeed <dbl>,
#> #   haul.id <chr>, ShootLat <dbl>, ShootLong <dbl>, Species <chr>,
#> #   CPUEun <dbl>, a <dbl>, b <dbl>, length_cm <dbl>

bits_ca_fle <- bits_ca_fle %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) %>% # Standardize length ((https://vocab.ices.dk/?ref=18))
  mutate(keep = ifelse(LngtCode == "." & Year == 2008, "N", "Y")) %>%
  filter(keep == "Y") %>% 
  filter(length_cm < 70)

# Now check if all rows where length is NA are the ones with zero catch!
hlfleL %>% 
  mutate(length2 = replace_na(length_cm, -9),
         no_length = ifelse(length2 < 0, "T", "F")) %>% 
  ggplot(., aes(length2, CPUEun, color = no_length)) + geom_point(alpha = 0.2) + facet_wrap(~no_length)
#> Warning: Removed 11 rows containing missing values (geom_point).


hlfleL %>% mutate(length2 = replace_na(length_cm, -9)) %>% group_by(length2) %>% distinct(CPUEun) %>% arrange(CPUEun)
#> # A tibble: 5,356 x 2
#> # Groups:   length2 [210]
#>    CPUEun length2
#>     <dbl>   <dbl>
#>  1      0      -9
#>  2      1      22
#>  3      1      24
#>  4      1      27
#>  5      1      29
#>  6      1      34
#>  7      1      37
#>  8      1      16
#>  9      1      17
#> 10      1      38
#> # … with 5,346 more rows

# Right, so all hauls with zero catch have NA length_cm. I don't have any NA catches
t <- hlfleL %>% drop_na(CPUEun)
# Well, 11 rows. I will remove them
hlfleL <- hlfleL %>% drop_na(CPUEun)
t <- hlfleL %>% filter(CPUEun == 0)
t <- hlfleL %>% drop_na(length_cm)

# In other words, a zero catch is when the catch is zero and length_cm is NA
# In order to not get any NA CPUEs in unit biomass because length is NA (I want them instead
# to be 0, as the numbers-CPUE is), I will replace length_cm == NA with length_cm == 0 before
# calculating biomass cpue
hlfleL <- hlfleL %>% mutate(length_cm2 = replace_na(length_cm, 0))

# Standardize length in the haul-data and calculate weight
hlfleL <- hlfleL %>% 
  mutate(weight_kg = (a*length_cm2^b)/1000) %>% 
  mutate(CPUEun_kg = weight_kg*CPUEun)

# Plot and check it's correct also in this data
ggplot(hlfleL, aes(weight_kg, length_cm2)) +
  geom_point() + 
  facet_wrap(~Year)
#> Warning: Removed 344 rows containing missing values (geom_point).


# Check
t <- hlfleL %>% drop_na(CPUEun_kg) # Should not have any NA in biomass-catch
t <- hlfleL %>% filter(CPUEun_kg == 0) # Should result in a few percent of rows (note this is not proportion of hauls, but rows)
t <- hlfleL %>% drop_na(length_cm2) # Should be no NA

# What is the proportion of zero-catch hauls?
cod_0plot <- hlcodL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_zero_catch_hauls = Y/(N+Y)) %>% 
  ggplot(., aes(Year, prop_zero_catch_hauls)) + geom_bar(stat = "identity") + 
  coord_cartesian(expand = 0, ylim = c(0, 1)) + 
  ggtitle("Cod")

# How many zero-catch hauls?
fle_0plot <- hlfleL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_zero_catch_hauls = Y/(N+Y)) %>% 
  ggplot(., aes(Year, prop_zero_catch_hauls)) + geom_bar(stat = "identity") + 
  coord_cartesian(expand = 0, ylim = c(0, 1)) + 
  ggtitle("Flounder")

cod_0plot / fle_0plot
#> Warning: Removed 1 rows containing missing values (position_stack).
#> Warning: Removed 1 rows containing missing values (position_stack).

Standardize according to Orio

To get unit: kg of fish caught by trawling for 1 h a standard bottom swept area of 0.45km2 using a TVL trawl with 75 m sweeps at the standard speed of three knots

# Remove hauls done with the TVL gear with a SweepLngt < 50 (these are calibration hauls, pers. com. Anders & Ale)
# And also hauls without length-information
# Remove pelagic gear
hlcodL <- hlcodL %>%
  mutate(SweepLngt2 = replace_na(SweepLngt, 50)) %>% 
  mutate(keep = ifelse(Gear == "TVL" & SweepLngt2 < 50, "N", "Y")) %>% 
  filter(keep == "Y") %>% 
  dplyr::select(-keep, -SweepLngt2) %>% 
  filter(!Gear == "PEL")
  
hlfleL <- hlfleL %>%
  mutate(SweepLngt2 = replace_na(SweepLngt, 50)) %>% 
  mutate(keep = ifelse(Gear == "TVL" & SweepLngt2 < 50, "N", "Y")) %>% 
  filter(keep == "Y") %>% 
  dplyr::select(-keep, -SweepLngt2) %>% 
  filter(!Gear == "PEL")

# Add in RS and RSA-values from the sweep file
# CPUE should be multiplied with RS and RSA to standardize to a relative speed and gear dimension.
# There is not a single file will all RS and RSA values. Instead they come in three files:
# - sweep (non-Swedish hauls between 1991-2016)
# - + calculated based on trawl speed and gear dimensions.
# I will join in the RS and RSA values from all sources, then standardize and filter
# away non-standardized hauls
# sort(unique(sweep$Year))
# sort(unique(sweep$Country))

# Since I don't have the sweep data for Swedish data, I have to calculate it from scratch using the 
# equation in Orio's spreadsheet

# First I will join in the sweep data, 
sweep_sel <- sweep %>% rename("haul.id" = "ï..haul.id") %>% dplyr::select(haul.id, RSA, RS)

hlcodL2 <- left_join(hlcodL, sweep_sel)
hlfleL2 <- left_join(hlfleL, sweep_sel)

hlcodL2 <- hlcodL2 %>%
  rename("RS_sweep" = "RS",
         "RSA_sweep" = "RSA") %>% 
  mutate(RS_sweep = as.numeric(RS_sweep),
         RSA_sweep = as.numeric(RSA_sweep))

hlfleL2 <- hlfleL2 %>%
  rename("RS_sweep" = "RS",
         "RSA_sweep" = "RSA") %>% 
  mutate(RS_sweep = as.numeric(RS_sweep),
         RSA_sweep = as.numeric(RSA_sweep))

sort(colnames(hlcodL2))
#>  [1] "a"                 "b"                 "BySpecRecCode"    
#>  [4] "CatCatchWgt"       "CatIdentifier"     "Country"          
#>  [7] "CPUEun"            "CPUEun_kg"         "DataType"         
#> [10] "DateofCalculation" "DevStage"          "DoorType"         
#> [13] "Fishing.line"      "Gear"              "GearEx"           
#> [16] "GroundSpeed"       "haul.id"           "HaulDur"          
#> [19] "HaulNo"            "HaulVal"           "HLNoAtLngt"       
#> [22] "IDx"               "length_cm"         "length_cm2"       
#> [25] "LenMeasType"       "LngtClass"         "LngtCode"         
#> [28] "NoMeas"            "Quarter"           "RecordType"       
#> [31] "Rect"              "RS_sweep"          "RSA_sweep"        
#> [34] "SD"                "Sex"               "Ship"             
#> [37] "Ship2"             "Ship3"             "ShootLat"         
#> [40] "ShootLong"         "SpecCode"          "SpecCodeType"     
#> [43] "Species"           "SpecVal"           "StdSpecRecCode"   
#> [46] "StNo"              "SubFactor"         "SubWgt"           
#> [49] "Survey"            "SweepLngt"         "TotalNo"          
#> [52] "Valid_Aphia"       "weight_kg"         "Year"
sort(colnames(hlfleL2))
#>  [1] "a"                 "b"                 "BySpecRecCode"    
#>  [4] "CatCatchWgt"       "CatIdentifier"     "Country"          
#>  [7] "CPUEun"            "CPUEun_kg"         "DataType"         
#> [10] "DateofCalculation" "DevStage"          "DoorType"         
#> [13] "Fishing.line"      "Gear"              "GearEx"           
#> [16] "GroundSpeed"       "haul.id"           "HaulDur"          
#> [19] "HaulNo"            "HaulVal"           "HLNoAtLngt"       
#> [22] "IDx"               "length_cm"         "length_cm2"       
#> [25] "LenMeasType"       "LngtClass"         "LngtCode"         
#> [28] "NoMeas"            "Quarter"           "RecordType"       
#> [31] "Rect"              "RS_sweep"          "RSA_sweep"        
#> [34] "SD"                "Sex"               "Ship"             
#> [37] "Ship2"             "Ship3"             "ShootLat"         
#> [40] "ShootLong"         "SpecCode"          "SpecCodeType"     
#> [43] "Species"           "SpecVal"           "StdSpecRecCode"   
#> [46] "StNo"              "SubFactor"         "SubWgt"           
#> [49] "Survey"            "SweepLngt"         "TotalNo"          
#> [52] "Valid_Aphia"       "weight_kg"         "Year"

# I will calculate a RS and RSA column in the catch data based on Ale's equation in the sweep file:
sort(unique(hlcodL2$GroundSpeed))
#>  [1] -9.0  0.8  1.7  1.8  2.0  2.1  2.2  2.4  2.5  2.6  2.7  2.8  2.9  3.0  3.1
#> [16]  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9  4.0  4.1  4.2  4.3  4.4  4.5  4.6
#> [31]  4.7  4.9  5.2  5.3  5.4  5.6  5.7  5.9  6.0  6.2  6.3  6.7  6.9  7.1  7.3
sort(unique(hlcodL2$Fishing.line))
#> [1]  -9.00  28.00  33.22  36.00  63.46  83.00 160.00
sort(unique(hlcodL2$SweepLngt))
#>  [1]  40  50  75  87  95 100 180 182 185 188 203 225

# First replace -9 in the columns I use for the calculations with NA so I don't end up with real numbers that are wrong!
hlcodL2 <- hlcodL2 %>% mutate(GroundSpeed = ifelse(GroundSpeed == -9, NA, GroundSpeed),
                              Fishing.line = ifelse(Fishing.line == -9, NA, Fishing.line),
                              SweepLngt = ifelse(SweepLngt == -9, NA, SweepLngt))

hlfleL2 <- hlfleL2 %>% mutate(GroundSpeed = ifelse(GroundSpeed == -9, NA, GroundSpeed),
                              Fishing.line = ifelse(Fishing.line == -9, NA, Fishing.line),
                              SweepLngt = ifelse(SweepLngt == -9, NA, SweepLngt))

# Now calculate correction factors
hlcodL2 <- hlcodL2 %>% mutate(RS_x = 3/GroundSpeed,
                              Horizontal.opening..m. = Fishing.line*0.67,
                              Swep.one.side..after.formula...meter = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                              Size.final..m = Horizontal.opening..m. + (Swep.one.side..after.formula...meter*2),
                              Swept.area = (Size.final..m*3*1860)/1000000,
                              RSA_x = 0.45388309675081/Swept.area)

hlfleL2 <- hlfleL2 %>% mutate(RS_x = 3/GroundSpeed,
                              Horizontal.opening..m. = Fishing.line*0.67,
                              Swep.one.side..after.formula...meter = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                              Size.final..m = Horizontal.opening..m. + (Swep.one.side..after.formula...meter*2),
                              Swept.area = (Size.final..m*3*1860)/1000000,
                              RSA_x = 0.45388309675081/Swept.area)

# Check EQ. is correct by recalculating it in the sweep file
sweep <- sweep %>% mutate(Horizontal.opening..m.2 = Fishing.line*0.67,
                          Swep.one.side..after.formula...meter2 = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                          Size.final..m2 = Horizontal.opening..m.2 + (Swep.one.side..after.formula...meter2*2),
                          Swept.area2 = (Size.final..m2*3*1860)/1000000,
                          RSA_x = 0.45388309675081/Swept.area2)

sweep %>%
  drop_na() %>%
  ggplot(., aes(as.numeric(RSA), RSA_x)) + geom_point() + geom_abline(intercept = 0, slope = 1)


# Replace NAs with -1/3 (because ICES codes missing values as -9 and in the calculation above they get -1/3),
# so that I can filter them easily later
hlcodL2$RS_x[is.na(hlcodL2$RS_x)] <- -1/3
hlcodL2$RS_sweep[is.na(hlcodL2$RS_sweep)] <- -1/3
hlcodL2$RSA_x[is.na(hlcodL2$RSA_x)] <- -1/3
hlcodL2$RSA_sweep[is.na(hlcodL2$RSA_sweep)] <- -1/3

hlfleL2$RS_x[is.na(hlfleL2$RS_x)] <- -1/3
hlfleL2$RS_sweep[is.na(hlfleL2$RS_sweep)] <- -1/3
hlfleL2$RSA_x[is.na(hlfleL2$RSA_x)] <- -1/3
hlfleL2$RSA_sweep[is.na(hlfleL2$RSA_sweep)] <- -1/3

# Compare the difference correction factors (calculated vs imported from sweep file)
p1 <- ggplot(filter(hlcodL2, RS_x > 0), aes(RS_x)) + geom_histogram() + xlim(0.4, 1.76)
p2 <- ggplot(hlcodL2, aes(RSA_x)) + geom_histogram()
p3 <- ggplot(hlcodL2, aes(RS_sweep)) + geom_histogram() + xlim(0.4, 1.76)
p4 <- ggplot(hlcodL2, aes(RSA_sweep)) + geom_histogram()

(p1 + p2) / (p3 + p4)
#> Warning: Removed 87 rows containing non-finite values (stat_bin).
#> Warning: Removed 2 rows containing missing values (geom_bar).
#> Warning: Removed 29697 rows containing non-finite values (stat_bin).
#> Warning: Removed 2 rows containing missing values (geom_bar).


p5 <- ggplot(filter(hlfleL2, RS_x > 0), aes(RS_x)) + geom_histogram() + xlim(0.4, 1.76)
p6 <- ggplot(hlfleL2, aes(RSA_x)) + geom_histogram()
p7 <- ggplot(hlfleL2, aes(RS_sweep)) + geom_histogram() + xlim(0.4, 1.76)
p8 <- ggplot(hlfleL2, aes(RSA_sweep)) + geom_histogram()

(p5 + p6) / (p7 + p8)
#> Warning: Removed 34 rows containing non-finite values (stat_bin).

#> Warning: Removed 2 rows containing missing values (geom_bar).
#> Warning: Removed 15354 rows containing non-finite values (stat_bin).
#> Warning: Removed 2 rows containing missing values (geom_bar).


# Why do I have RSA values smaller than one? (eihter because sweep length is longer or gear is larger (GOV))
# Check if I can calculate the same RSA in sweep as that entered there.
# Ok, so the equation is correct. Which ID's have RSA < 1?
hlcodL2 %>% 
  filter(RSA_x < 1 & RSA_x > 0) %>%
  dplyr::select(Year, Country, Ship, Gear, haul.id, Horizontal.opening..m., Fishing.line,
                Swep.one.side..after.formula...meter, SweepLngt, Size.final..m, Swept.area, RSA_x) %>% 
  ggplot(., aes(RSA_x, fill = factor(SweepLngt))) + geom_histogram() + facet_wrap(~Gear, ncol = 1)


# Check if I have more than one unique RS or RSA value per haul, or if it's "either this or that"
# Filter positive in both columns
hlcodL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlcodL2 %>% filter(RSA_x > 0 & RSA_sweep > 0) %>% ggplot(., aes(RSA_x, RSA_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlfleL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlfleL2 %>% filter(RSA_x > 0 & RSA_sweep > 0) %>% ggplot(., aes(RSA_x, RSA_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


# Ok, there's on odd RS_x that is larger than 3. It didn't catch anything and speed is 0.8! Will remove
hlcodL2 <- hlcodL2 %>% filter(RS_x < 3)
hlfleL2 <- hlfleL2 %>% filter(RS_x < 3)

# Plot again
hlcodL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlfleL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


# They are the same when they overlap, so it does not matter which data source I use for RS and RSA values
# Make a single RS and RSA column

# Cod 
hlcodL3 <- hlcodL2 %>%
  mutate(RS = -99,
         RS = ifelse(RS_sweep > 0, RS_sweep, RS),
         RS = ifelse(RS < 0 & RS_x > 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
  mutate(RSA = -99,
         RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
         RSA = ifelse(RSA < 0 & RSA_x > 0, RSA_x, RSA)) %>%
  filter(RS > 0) %>%
  filter(RSA > 0) %>% 
  mutate(RSRSA = RS*RSA)

# Plot
ggplot(hlcodL3, aes(RSRSA)) + geom_histogram()


hlfleL2 %>% filter(Country == "LAT") %>% distinct(Year) %>% arrange(Year)
#> # A tibble: 22 x 1
#>     Year
#>    <int>
#>  1  1997
#>  2  1999
#>  3  2000
#>  4  2001
#>  5  2002
#>  6  2003
#>  7  2004
#>  8  2005
#>  9  2006
#> 10  2007
#> # … with 12 more rows

# Flounder 
hlfleL3 <- hlfleL2 %>%
  mutate(RS = -999,
         RS = ifelse(RS_sweep > 0, RS_sweep, RS),
         RS = ifelse(RS < 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
  mutate(RSA = -999,
         RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
         RSA = ifelse(RSA < 0, RSA_x, RSA)) %>% 
  filter(RS > 0) %>%
  filter(RSA > 0) %>% 
  mutate(RSRSA = RS*RSA)

# Test how many years of LAT data I miss out on because I can't standardize it.
# hlfleL2 %>%
#   mutate(RS = -999,
#          RS = ifelse(RS_sweep > 0, RS_sweep, RS),
#          RS = ifelse(RS < 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
#   filter(RS > 0) %>% 
#   filter(Country == "LAT") %>% 
#   distinct(Year) %>% 
#   arrange(Year)
#   
# hlfleL2 %>%
#   mutate(RSA = -999,
#          RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
#          RSA = ifelse(RSA < 0, RSA_x, RSA)) %>% 
#   filter(RSA > 0) %>% 
#   filter(Country == "LAT") %>% 
#   distinct(Year) %>% 
#   arrange(Year)

# Plot
ggplot(hlcodL3, aes(RSRSA)) + geom_histogram()


# Standardize!
hlcodL3 <- hlcodL3 %>%
  mutate(CPUEst_kg = CPUEun_kg*RS*RSA,
         CPUEst = CPUEun*RS*RSA)

hlfleL3 <- hlfleL3 %>%
  mutate(CPUEst_kg = CPUEun_kg*RS*RSA,
         CPUEst = CPUEun*RS*RSA)
  
unique(is.na(hlcodL3$CPUEst_kg))
#> [1] FALSE
unique(is.na(hlcodL3$CPUEst))
#> [1] FALSE
min(hlcodL3$CPUEst_kg)
#> [1] 0
min(hlcodL3$CPUEst)
#> [1] 0

unique(is.na(hlfleL3$CPUEst_kg)) # Remove the few NA's here
#> [1]  TRUE FALSE
hlfleL3 <- hlfleL3 %>% drop_na(CPUEst_kg)
unique(is.na(hlfleL3$CPUEst))
#> [1] FALSE
min(hlfleL3$CPUEst_kg) 
#> [1] 0
min(hlfleL3$CPUEst)
#> [1] 0

# Now calculate total CPUE per haul in Orios unit, then create the new unit, i.e.:
# convert from kg of fish caught by trawling for 1 h a standard bottom swept area of
# 0.45km2 (using a TVL trawl with 75 m sweeps at the standard speed of three knots) to....
# kg of fish per km^2 by dividing with 0.45

hlcodhaul <- hlcodL3 %>%
  group_by(haul.id) %>% 
  mutate(haul_cpue_kg = sum(CPUEst_kg),
         haul_cpue = sum(CPUEst),
         haul_cpue_kg_un = sum(CPUEun_kg),
         haul_cpue_un = sum(CPUEun),
         density = haul_cpue_kg/0.45,
         density_ab = haul_cpue/0.45) %>% 
  ungroup() %>% 
  distinct(haul.id, .keep_all = TRUE)

# t <- hlcodhaul %>% filter(haul_cpue_un == 0)
# t <- hlcodhaul %>% filter(!Country == "SWE") %>% filter(haul_cpue_un > 0)

hlflehaul <- hlfleL3 %>%
  group_by(haul.id) %>% 
  mutate(haul_cpue_kg = sum(CPUEst_kg),
         haul_cpue = sum(CPUEst),
         haul_cpue_kg_un = sum(CPUEun_kg),
         haul_cpue_un = sum(CPUEun),
         density = haul_cpue_kg/0.45,
         density_ab = haul_cpue/0.45) %>% 
  ungroup() %>% 
  distinct(haul.id, .keep_all = TRUE)

# Rename things and select specific columns
dat <- hlcodhaul %>% rename("year" = "Year",
                            "lat" = "ShootLat",
                            "lon" = "ShootLong",
                            "quarter" = "Quarter",
                            "ices_rect" = "Rect") %>% 
  dplyr::select(density, year, lat, lon, quarter, haul.id, IDx, ices_rect, SD)

# Now add in the flounder data in the cod data based on haul ID
hlflehaul_select <- hlflehaul %>% mutate(density_fle = density) %>% dplyr::select(density_fle, haul.id)

dat <- left_join(dat, hlflehaul_select, by = "haul.id") %>% drop_na(density_fle)

dat %>% arrange(desc(density_fle)) %>% data.frame() %>% head(50)
#>        density year     lat     lon quarter                     haul.id
#> 1    75.853482 2010 57.3617 21.2350       4     2010:4:LAT:BALL:TVL:2:6
#> 2    94.385161 2007 57.2124 18.8491       4   2007:4:SWE:ARG:TVL:689:14
#> 3   265.278057 2012 57.4600 21.2450       4     2012:4:LAT:BALL:TVL:2:6
#> 4   145.764831 2019 54.4500 19.3283       4  2019:4:POL:BAL:TVL:26256:4
#> 5    36.249608 2009 57.8994 19.4332       4   2009:4:SWE:ARG:TVL:723:11
#> 6   109.185255 2012 57.3683 21.2533       4     2012:4:LAT:BALL:TVL:2:7
#> 7  1843.781093 2001 56.4709 18.6149       4   2001:4:DEN:DAN2:TVL:93:38
#> 8   868.595807 2019 55.4627 14.5290       4    2019:4:SWE:77SE:TVL:66:2
#> 9   177.603041 2008 57.1766 18.8081       4   2008:4:SWE:ARG:TVL:697:12
#> 10  468.959595 2011 57.2433 21.0817       4    2011:4:LAT:BALL:TVL:2:11
#> 11  312.027608 2008 57.4733 21.1167       4     2008:4:LAT:BALL:TVL:2:3
#> 12    9.468183 2013 57.7022 19.1408       4   2013:4:SWE:DAN2:TVL:33:17
#> 13 1614.959782 2007 57.2033 20.7167       4     2007:4:LAT:BALL:TVL:2:9
#> 14  266.250565 2013 57.8742 19.4317       4   2013:4:SWE:DAN2:TVL:30:15
#> 15   63.431536 2008 57.1973 18.8309       4   2008:4:SWE:ARG:TVL:710:16
#> 16  181.205759 2017 57.3583 21.2600       4     2017:4:LAT:BALL:TVL:2:2
#> 17  366.600070 2008 57.1658 18.8226       4   2008:4:SWE:ARG:TVL:696:11
#> 18 1410.470804 2002 56.0901 20.5504       4   2002:4:DEN:DAN2:TVL:29:14
#> 19  491.714968 2007 56.6283 20.5817       4    2007:4:LAT:BALL:TVL:2:15
#> 20   67.401173 2011 55.1000 20.1083       4     2011:4:RUS:ATL:TVL:62:2
#> 21  248.422144 2007 57.1698 18.8370       4   2007:4:SWE:ARG:TVL:687:12
#> 22  226.040831 2017 54.5167 18.9117       4 2017:4:POL:BAL:TVL:26280:41
#> 23  312.968144 2006 56.6850 20.7667       4    2006:4:LAT:BALL:TVL:2:14
#> 24  177.801499 2010 57.3233 21.0750       4     2010:4:LAT:BALL:TVL:2:5
#> 25  145.904915 2011 57.0200 20.9850       4    2011:4:LAT:BALL:TVL:2:12
#> 26  257.081364 2018 54.4150 19.3050       4 2018:4:POL:BAL:TVL:26163:11
#> 27   11.547042 2004 57.8733 19.4150       4    2004:4:SWE:ARG:TVL:726:5
#> 28   26.529045 2000 57.1790 18.8753       4   2000:4:SWE:ARG:GOV:606:38
#> 29 1319.885682 2008 55.8033 20.0700       4    2008:4:LAT:BALL:TVL:2:26
#> 30   93.681053 2006 57.2300 21.1567       4     2006:4:LAT:BALL:TVL:2:9
#> 31 1237.927169 2007 57.0367 20.7783       4    2007:4:LAT:BALL:TVL:2:11
#> 32  471.672180 2008 54.3950 18.9850       4 2008:4:POL:BAL:TVL:26219:20
#> 33  416.405620 2006 56.6650 20.7817       4    2006:4:LAT:BALL:TVL:2:15
#> 34  506.792401 2008 56.9983 20.7650       4    2008:4:LAT:BALL:TVL:2:12
#> 35  366.748811 2001 57.3633 16.9213       4    2001:4:SWE:ARG:TVL:597:9
#> 36  317.643716 2007 56.6233 20.5583       4    2007:4:LAT:BALL:TVL:2:16
#> 37  149.904232 2011 57.0433 21.0300       4    2011:4:LAT:BALL:TVL:2:13
#> 38 2886.907131 2008 55.1000 20.1500       4   2008:4:RUS:ATLD:TVL:51:32
#> 39   26.422042 2001 57.8727 19.4198       4    2001:4:SWE:ARG:TVL:585:3
#> 40  759.016698 2015 55.6842 14.5025       4     2015:4:SWE:DAN2:TVL:4:2
#> 41  446.869160 2011 56.6350 20.7333       4     2011:4:LAT:BALL:TVL:2:7
#> 42  133.605963 2009 57.4180 17.0216       4    2009:4:SWE:ARG:TVL:714:2
#> 43  234.058199 1999 54.7667 13.6667       4    1999:4:GFR:SOL:H20:37:32
#> 44  719.829357 2011 56.6100 20.6367       4     2011:4:LAT:BALL:TVL:2:6
#> 45  186.121205 2017 57.3617 21.2550       4     2017:4:LAT:BALL:TVL:2:1
#> 46  104.111882 2018 54.4033 19.2700       4 2018:4:POL:BAL:TVL:26216:12
#> 47   50.550152 2015 57.4283 21.2833       4     2015:4:LAT:BALL:TVL:2:8
#> 48   92.484410 2008 57.7783 21.4117       4     2008:4:LAT:BALL:TVL:2:2
#> 49  234.776192 2017 57.2183 21.1217       4     2017:4:LAT:BALL:TVL:2:3
#> 50   45.626984 1995 57.1647 18.8183       4   1995:4:SWE:ARG:FOT:256:11
#>                             IDx ices_rect SD density_fle
#> 1       2010.4.LAT.67BC.TVL.2.6      43H1 28   6723.2629
#> 2    2007.4.SWE.77AR.TVL.689.14      43G8 28   3251.8846
#> 3       2012.4.LAT.67BC.TVL.2.6      43H1 28   2744.7969
#> 4   2019.4.POL.67BC.TVL.26256.4      37G9 26   2566.4037
#> 5    2009.4.SWE.77AR.TVL.723.11      44G9 28   2518.0079
#> 6       2012.4.LAT.67BC.TVL.2.7      43H1 28   2483.0721
#> 7     2001.4.DEN.26D4.TVL.93.38      41G8 26   2154.9624
#> 8      2019.4.SWE.77SE.TVL.66.2      39G4 24   2085.8337
#> 9    2008.4.SWE.77AR.TVL.697.12      43G8 28   2067.7463
#> 10     2011.4.LAT.67BC.TVL.2.11      43H1 28   2058.3928
#> 11      2008.4.LAT.67BC.TVL.2.3      43H1 28   1910.8130
#> 12    2013.4.SWE.26D4.TVL.33.17      44G9 28   1738.9815
#> 13      2007.4.LAT.67BC.TVL.2.9      43H0 28   1717.2111
#> 14    2013.4.SWE.26D4.TVL.30.15      44G9 28   1694.3936
#> 15   2008.4.SWE.77AR.TVL.710.16      43G8 28   1654.8391
#> 16      2017.4.LAT.67BC.TVL.2.2      43H1 28   1600.2680
#> 17   2008.4.SWE.77AR.TVL.696.11      43G8 28   1584.2860
#> 18    2002.4.DEN.26D4.TVL.29.14      41H0 26   1583.0442
#> 19     2007.4.LAT.67BC.TVL.2.15      42H0 28   1576.0516
#> 20     2011.4.RUS.RUNT.TVL.62.2      39H0 26   1567.9321
#> 21   2007.4.SWE.77AR.TVL.687.12      43G8 28   1487.4590
#> 22 2017.4.POL.67BC.TVL.26280.41      38G8 26   1465.8711
#> 23     2006.4.LAT.67BC.TVL.2.14      42H0 28   1357.7216
#> 24      2010.4.LAT.67BC.TVL.2.5      43H1 28   1292.5683
#> 25     2011.4.LAT.67BC.TVL.2.12      43H0 28   1290.7944
#> 26 2018.4.POL.67BC.TVL.26163.11      37G9 26   1273.8596
#> 27    2004.4.SWE.77AR.TVL.726.5      44G9 28   1224.8131
#> 28   2000.4.SWE.77AR.GOV.606.38      43G8 28   1209.4399
#> 29     2008.4.LAT.67BC.TVL.2.26      40H0 26   1184.3045
#> 30      2006.4.LAT.67BC.TVL.2.9      43H1 28   1173.8761
#> 31     2007.4.LAT.67BC.TVL.2.11      43H0 28   1170.8015
#> 32 2008.4.POL.67BC.TVL.26219.20      37G8 26   1156.1423
#> 33     2006.4.LAT.67BC.TVL.2.15      42H0 28   1143.9646
#> 34     2008.4.LAT.67BC.TVL.2.12      42H0 28   1136.8738
#> 35    2001.4.SWE.77AR.TVL.597.9      43G6 27   1115.8434
#> 36     2007.4.LAT.67BC.TVL.2.16      42H0 28   1083.7434
#> 37     2011.4.LAT.67BC.TVL.2.13      43H1 28   1071.6831
#> 38    2008.4.RUS.RUJB.TVL.51.32      39H0 26   1070.0434
#> 39    2001.4.SWE.77AR.TVL.585.3      44G9 28   1068.8500
#> 40      2015.4.SWE.26D4.TVL.4.2      40G4 25   1068.2623
#> 41      2011.4.LAT.67BC.TVL.2.7      42H0 28   1041.5374
#> 42    2009.4.SWE.77AR.TVL.714.2      43G7 27   1028.1830
#> 43    1999.4.GFR.06S1.H20.37.32      38G3 24   1016.2832
#> 44      2011.4.LAT.67BC.TVL.2.6      42H0 28   1009.6423
#> 45      2017.4.LAT.67BC.TVL.2.1      43H1 28   1004.9214
#> 46 2018.4.POL.67BC.TVL.26216.12      37G9 26    991.3600
#> 47      2015.4.LAT.67BC.TVL.2.8      43H1 28    985.0383
#> 48      2008.4.LAT.67BC.TVL.2.2      44H1 28    984.6904
#> 49      2017.4.LAT.67BC.TVL.2.3      43H1 28    956.4140
#> 50   1995.4.SWE.77AR.FOT.256.11      43G8 28    909.9229

# Filter unrealistically large densities 
dat <- dat %>% filter(density_fle < 10000)

ggplot(dat, aes(density, density_fle)) + geom_point()

Compare cod data with Orio et al (2017)

# Read Orio data first for comparison:
test_cod <- hlcodhaul %>% filter(!Country == "SWE")
test_fle <- hlflehaul %>% filter(!Country == "SWE")

orio_cod <- read.csv("/Users/maxlindmark/Desktop/R_STUDIO_PROJECTS/ale_gear_standardization/datras_st_ale.csv")
orio_fle <- read.csv("/Users/maxlindmark/Desktop/R_STUDIO_PROJECTS/ale_gear_standardization/datras_fle_st_ale.csv")

orio_cod <- orio_cod %>%
  group_by(IDX) %>%
  mutate(haul_cpue_kg = sum(CPUEstBIOyear),
         haul_cpue = sum(CPUEst),
         haul_cpue_kg_un = sum(CPUEunBIOyear),
         haul_cpue_un = sum(CPUEun)) %>%
  ungroup() %>%
  distinct(IDX, .keep_all = TRUE)

orio_fle <- orio_fle %>%
  group_by(IDX) %>%
  mutate(haul_cpue_kg = sum(CPUEstBIOyear),
         haul_cpue = sum(CPUEst),
         haul_cpue_kg_un = sum(CPUEunBIOyear),
         haul_cpue_un = sum(CPUEun)) %>%
  ungroup() %>%
  distinct(IDX, .keep_all = TRUE)

# hlcodhaul %>% group_by(IDx) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)

# Standardize data for easier plotting
test_cod <- test_cod %>%
  dplyr::select(haul_cpue_kg, haul_cpue, haul_cpue_kg_un, haul_cpue_un, Year, Ship3, Country, Gear) %>% 
  mutate(source = "Max",
         species = "Cod") %>% 
  rename("Ship" = "Ship3") %>% 
  filter(Year > 1992 & Year < 2017)

test_fle <- test_fle %>%
  dplyr::select(haul_cpue_kg, haul_cpue, haul_cpue_kg_un, haul_cpue_un, Year, Ship3, Country, Gear) %>% 
  mutate(source = "Max",
         species = "Fle") %>% 
  rename("Ship" = "Ship3") %>% 
  filter(Year > 1992 & Year < 2017)

orio_cod <- orio_cod %>%
  dplyr::select(haul_cpue_kg, haul_cpue, haul_cpue_kg_un, haul_cpue_un, year, vessel, IDX) %>% 
  mutate(source = "Ale",
         species = "Cod") %>% 
  rename("Year" = "year",
         "Ship" = "vessel") %>% 
  mutate(IDX2 = IDX,
         IDX3 = IDX) %>% 
  separate(IDX, sep = c(5:7), into = c("temp_year", "Quarter")) %>% 
  separate(temp_year, sep = c(4:5), into = c("Year", "scrap")) %>% 
  separate(IDX2, sep = 7, into = c("scrap2", "Country")) %>%
  separate(Country, sep = 3, into = c("Country", "scrap3")) %>%
  separate(IDX3, sep = 15, into = c("scrap4", "Gear")) %>%
  separate(Gear, sep = 3, into = c("Gear", "scrap5")) %>% 
  dplyr::select(-scrap, -scrap2, -scrap3, -scrap4, -scrap5) %>% 
  filter(Quarter == 4) %>% 
  mutate(Year = as.integer(Year)) %>% 
  mutate(haul_cpue_kg = haul_cpue_kg/1000,
         haul_cpue_kg_un = haul_cpue_kg_un/1000) %>% 
  filter(Year > 1992) %>% 
  filter(Country %in% unique(test_cod$Country))

orio_fle <- orio_fle %>% 
  dplyr::select(haul_cpue_kg, haul_cpue, haul_cpue_kg_un, haul_cpue_un, year, vessel, IDX) %>% 
  mutate(source = "Ale",
         species = "Fle") %>% 
  rename("Year" = "year",
         "Ship" = "vessel") %>% 
  mutate(IDX2 = IDX,
         IDX3 = IDX) %>% 
  separate(IDX, sep = c(5:7), into = c("temp_year", "Quarter")) %>% 
  separate(temp_year, sep = c(4:5), into = c("Year", "scrap")) %>% 
  separate(IDX2, sep = 7, into = c("scrap2", "Country")) %>%
  separate(Country, sep = 3, into = c("Country", "scrap3")) %>%
  separate(IDX3, sep = 15, into = c("scrap4", "Gear")) %>%
  separate(Gear, sep = 3, into = c("Gear", "scrap5")) %>% 
  dplyr::select(-scrap, -scrap2, -scrap3, -scrap4, -scrap5) %>% 
  filter(Quarter == 4) %>% 
  mutate(Year = as.integer(Year)) %>% 
  mutate(haul_cpue_kg = haul_cpue_kg/1000,
         haul_cpue_kg_un = haul_cpue_kg_un/1000) %>% 
  filter(Year > 1992) %>% 
  filter(Country %in% unique(test_cod$Country))

# sort(unique(test_cod$Country))
# sort(unique(orio_cod$Country))

# Check proportions of zero
t <- test_cod %>% filter(haul_cpue_kg > 0)
t <- orio_cod %>% filter(haul_cpue_kg > 0)

t <- test_fle %>% filter(haul_cpue_kg > 0)
t <- orio_fle %>% filter(haul_cpue_kg > 0)

test_full <- bind_rows(orio_fle, orio_cod, test_cod, test_fle)

# Check the non-standardized data for cod
# Raw abundance cpue
test_full %>% 
  filter(species == "Cod") %>% 
  ggplot(., aes(factor(Year), haul_cpue_un, color = source)) + 
  geom_point(size = 1, position = position_dodge(width = 0.5)) + 
  theme(axis.text.x = element_text(angle = 90)) +
  NULL

  
test_full %>% 
  filter(species == "Cod") %>% 
  ggplot(., aes(haul_cpue_un, fill = source)) + 
  geom_histogram(position = position_dodge()) + 
  facet_wrap(~Year, scale = "free") +
  theme(axis.text.x = element_text(angle = 90)) + 
  NULL


# Mean abundance cpue
test_full %>% 
  filter(species == "Cod") %>% 
  group_by(Year, source) %>% 
  summarise(mean_cpue = mean(haul_cpue_un),
            sd_cpue = sd(haul_cpue_un)) %>% 
  ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) + 
  geom_line(size = 1.1) +
  geom_point(size = 3) + 
  NULL


# Mean abundance cpue by country
test_full %>% 
  filter(species == "Cod") %>% 
  group_by(Year, source, Country) %>% 
  summarise(mean_cpue = mean(haul_cpue_un),
            sd_cpue = sd(haul_cpue_un)) %>% 
  ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) + 
  geom_line(size = 1.1) +
  facet_wrap(~ Country) +
  NULL


# Mean abundance cpue for non-zero cathes
test_full %>% 
  filter(species == "Cod") %>% 
  filter(haul_cpue_un > 0) %>% 
  group_by(Year, source) %>% 
  summarise(mean_cpue = mean(haul_cpue_un),
            sd_cpue = sd(haul_cpue_un)) %>% 
  ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) + 
  geom_line(size = 1.1) +
  geom_point(size = 3) + 
  NULL


# Now plot corrected biomass cpue
test_full %>% 
  filter(species == "Cod") %>% 
  ggplot(., aes(factor(Year), haul_cpue_kg, color = source)) + 
  geom_point(size = 1, position = position_dodge(width = 0.5)) + 
  theme(axis.text.x = element_text(angle = 90)) +
  NULL


# Mean corrected biomass cpue
test_full %>% 
  filter(species == "Cod") %>% 
  group_by(Year, source) %>% 
  summarise(mean_cpue = mean(haul_cpue_kg),
            sd_cpue = sd(haul_cpue_kg)) %>% 
  ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) + 
  geom_line(size = 1.1) +
  geom_point(size = 3) + 
  NULL



# Now check in on flounder
# Raw abundance cpue
test_full %>% 
  filter(species == "Fle") %>% 
  ggplot(., aes(factor(Year), haul_cpue_un, color = source)) + 
  geom_point(size = 1, position = position_dodge(width = 0.5)) + 
  theme(axis.text.x = element_text(angle = 90)) +
  NULL

  
test_full %>% 
  filter(species == "Fle") %>% 
  ggplot(., aes(haul_cpue_un, fill = source)) + 
  geom_histogram(position = position_dodge()) + 
  facet_wrap(~Year, scale = "free") +
  theme(axis.text.x = element_text(angle = 90)) + 
  NULL


# Mean abundance cpue
test_full %>% 
  filter(species == "Fle") %>% 
  group_by(Year, source) %>% 
  summarise(mean_cpue = mean(haul_cpue_un),
            sd_cpue = sd(haul_cpue_un)) %>% 
  ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) + 
  geom_line(size = 1.1) +
  geom_point(size = 3) + 
  NULL


# Ok, here we can see that Ale predicts an increase earlier, which we also saw 
# on the plot of raw catches as points
# Mean abundance cpue by country to see how this can arise
test_full %>% 
  filter(species == "Fle") %>% 
  group_by(Year, source, Country) %>% 
  summarise(mean_cpue = mean(haul_cpue_un),
            sd_cpue = sd(haul_cpue_un)) %>% 
  ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) + 
  geom_line(size = 1.1) +
  facet_wrap(~ Country) +
  NULL


# Ok, after checking, the reason I don't have the older LAT data is because I don't
# have RS or RSA-values for those hauls, even though the catch data are in datras...
# Should probably check with Ale how he corrected those!

# Now plot corrected biomass cpue
test_full %>% 
  filter(species == "Fle") %>% 
  ggplot(., aes(factor(Year), haul_cpue_kg, color = source)) + 
  geom_point(size = 1, position = position_dodge(width = 0.5)) + 
  theme(axis.text.x = element_text(angle = 90)) +
  NULL


# Mean corrected biomass cpue
test_full %>% 
  filter(species == "Fle") %>% 
  group_by(Year, source) %>% 
  summarise(mean_cpue = mean(haul_cpue_kg),
            sd_cpue = sd(haul_cpue_kg)) %>% 
  ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) + 
  geom_line(size = 1.1) +
  geom_point(size = 3) + 
  NULL


# Then check lenght-weight relationships again

Add in the environmental variables

Depth

# Read the tifs
west <- raster("data/depth_geo_tif/D5_2018_rgb-1.tif")
#plot(west)

east <- raster("data/depth_geo_tif/D6_2018_rgb-1.tif")
# plot(east)

dep_rast <- raster::merge(west, east)

dat$depth <- extract(dep_rast, dat[, 4:3])

# Convert to depth (instead of elevation)
ggplot(dat, aes(depth)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

dat$depth <- (dat$depth - max(drop_na(dat)$depth)) *-1
#> drop_na: no rows removed
ggplot(dat, aes(depth)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Oxygen

# Downloaded from here: https://resources.marine.copernicus.eu/?option=com_csw&view=details&product_id=BALTICSEA_REANALYSIS_BIO_003_012
# Extract raster points: https://gisday.wordpress.com/2014/03/24/extract-raster-values-from-points-using-r/comment-page-1/
# https://rpubs.com/boyerag/297592
# https://pjbartlein.github.io/REarthSysSci/netCDF.html#get-a-variable
# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1610091357600.nc")

print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1610091357600.nc (NC_FORMAT_CLASSIC):
#> 
#>      1 variables (excluding dimension variables):
#>         float o2b[longitude,latitude,time]   
#>             long_name: Sea_floor_Dissolved_Oxygen_Concentration
#>             missing_value: NaN
#>             standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
#>             units: mmol m-3
#>             _FillValue: NaN
#>             _ChunkSizes: 1
#>              _ChunkSizes: 523
#>              _ChunkSizes: 383
#> 
#>      3 dimensions:
#>         time  Size:324
#>             axis: T
#>             long_name: Validity time
#>             standard_name: time
#>             units: days since 1950-01-01 00:00:00
#>             calendar: gregorian
#>             _ChunkSizes: 512
#>             _CoordinateAxisType: Time
#>             valid_min: 15721.5
#>             valid_max: 25551.5
#>         latitude  Size:166
#>             axis: Y
#>             standard_name: latitude
#>             long_name: latitude
#>             units: degrees_north
#>             _CoordinateAxisType: Lat
#>             valid_min: 52.991626739502
#>             valid_max: 58.4915390014648
#>         longitude  Size:253
#>             standard_name: longitude
#>             long_name: longitude
#>             units: degrees_east
#>             axis: X
#>             _CoordinateAxisType: Lon
#>             valid_min: 9.01375484466553
#>             valid_max: 23.013614654541
#> 
#>     24 global attributes:
#>         references: http://www.smhi.se
#>         institution: Swedish Meterological and Hydrological Institute
#>         history: See source and creation_date attributees
#>         Conventions: CF-1.5
#>         contact: servicedesk_cmems@mercator-ocean.eu
#>         comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#>         bullentin_type: reanalysis
#>         cmems_product_id: BALTICSEA_REANALYSIS_BIO_003_012
#>         title: CMEMS V4 Reanalysis: SCOBI model 3D fields (monthly means)
#>         FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#>         FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#>         FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#>         FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#>         shallowest_depth: 1.50136542320251
#>         deepest_depth: 711.059204101562
#>         source: SMHI reanalysis run NORDIC-NS2_1d_20191201_20191202
#>         file_quality_index: 1
#>         creation_date: 2020-11-16 UTC
#>         bullentin_date: 20191201
#>         start_date: 2019-12-01 UTC
#>         stop_date: 2019-12-01 UTC
#>         start_time: 00:00 UTC
#>         stop_time: 00:00 UTC
#>         _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention

# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530

lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 52.99163 53.02496 53.05829 53.09163 53.12496 53.15829

# Get time
time <- ncvar_get(ncin,"time")
time
#>   [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#>  [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#>  [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#>  [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#>  [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#>  [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#>  [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#>  [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#>  [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#>  [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#>  [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5

tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 324
tunits
#> $hasatt
#> [1] TRUE
#> 
#> $value
#> [1] "days since 1950-01-01 00:00:00"

# Get oxygen
dname <- "o2b"

oxy_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(oxy_array)
#> [1] 253 166 324

# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])

# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))

# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)

# Replace netCDF fill values with NA's
oxy_array[oxy_array == fillvalue$value] <- NA

# We only use Quarter 4 in this analysis, so now we want to loop through each time step,
# and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#>   [1]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#>  [26]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#>  [51]  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3
#>  [76]  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4
#> [101]  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5
#> [126]  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6
#> [151]  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7
#> [176]  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8
#> [201]  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9
#> [226] 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10
#> [251] 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11
#> [276] 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12
#> [301]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12

index_keep <- which(months > 9)

oxy_q4 <- oxy_array[, , index_keep]

months_keep <- months[index_keep]

years_keep <- years[index_keep]

# Now we have an array with only Q4 data...
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq <- seq(1, dim(oxy_q4)[3], by = 3)

# Create objects that will hold data
dlist <- list()
oxy_10 <- c()
oxy_11 <- c()
oxy_12 <- c()
oxy_ave <- c()

# Loop through the vector sequence with every third value, then take the average of
# three consecutive months (i.e. q4)
for(i in loop_seq) {
  
  oxy_10 <- oxy_q4[, , (i)]
  oxy_11 <- oxy_q4[, , (i + 1)]
  oxy_12 <- oxy_q4[, , (i + 2)]
  
  oxy_ave <- (oxy_10 + oxy_11 + oxy_12) / 3
  
  list_pos <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  
  dlist[[list_pos]] <- oxy_ave
  
}

# Now name the lists with the year:
names(dlist) <- unique(years_keep)

# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script

# Filter years in the cpue data frame to only have the years I have oxygen for
d_sub_oxy <- dat %>% filter(year %in% names(dlist)) %>% droplevels()
#> filter: removed 31 rows (1%), 3,453 rows remaining

# Create data holding object
data_list <- list()

# ... And for the oxygen raster
raster_list <- list()

# Create factor year for indexing the list in the loop
d_sub_oxy$year_f <- as.factor(d_sub_oxy$year)

# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_oxy$year_f)) {
  
  # Set plot limits
  ymin = 54; ymax = 58; xmin = 12; xmax = 22

  # Subset a year
  oxy_slice <- dlist[[i]]
  
  # Create raster for that year (i)
  r <- raster(t(oxy_slice), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
              crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  
  # Flip...
  r <- flip(r, direction = 'y')
  
  #plot(r, main = i)
  
  # Filter the same year (i) in the cpue data and select only coordinates
  d_slice <- d_sub_oxy %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  
  # Make into a SpatialPoints object
  data_sp <- SpatialPoints(d_slice)
  
  # Extract raster value (oxygen)
  rasValue <- raster::extract(r, data_sp)
  
  # Now we want to plot the results of the raster extractions by plotting the cpue
  # data points over a raster and saving it for each year.
  # Make the SpatialPoints object into a raster again (for pl)
  df <- as.data.frame(data_sp)
  
  # Add in the raster value in the df holding the coordinates for the cpue data
  d_slice$oxy <- rasValue
  
  # Add in which year
  d_slice$year <- i
  
  # Create a index for the data last where we store all years (because our loop index
  # i is not continuous, we can't use it directly)
  index <- as.numeric(d_slice$year)[1] - 1992
  
  # Add each years' data in the list
  data_list[[index]] <- d_slice
  
  # Save to check each year is ok! First convert the raster to points for plotting
  # (so that we can use ggplot)
  map.p <- rasterToPoints(r)
  
  # Make the points a dataframe for ggplot
  df_rast <- data.frame(map.p)
  
  # Rename y-variable and add year
  df_rast <- df_rast %>% rename("oxy" = "layer") %>% mutate(year = i)
  
  # Add each years' raster data frame in the list
  raster_list[[index]] <- df_rast
  
  # Make appropriate column headings
  colnames(df_rast) <- c("Longitude", "Latitude", "oxy")
  
  # Now make the map
  ggplot(data = df_rast, aes(y = Latitude, x = Longitude)) +
    geom_raster(aes(fill = oxy)) +
    geom_point(data = d_slice, aes(x = lon, y = lat, fill = oxy),
               color = "black", size = 5, shape = 21) +
    theme_bw() +
    geom_sf(data = world, inherit.aes = F, size = 0.2) +
    coord_sf(xlim = c(xmin, xmax),
             ylim = c(ymin, ymax)) +
    scale_colour_gradientn(colours = rev(terrain.colors(10)),
                           limits = c(-200, 400)) +
    scale_fill_gradientn(colours = rev(terrain.colors(10)),
                         limits = c(-200, 400)) +
    NULL
  
  ggsave(paste("figures/supp/cpue_oxygen_rasters/", i,".png", sep = ""),
         width = 6.5, height = 6.5, dpi = 600)
  
}
#> filter: removed 3,392 rows (98%), 61 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,391 rows (98%), 62 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,400 rows (98%), 53 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,392 rows (98%), 61 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,378 rows (98%), 75 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,385 rows (98%), 68 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,359 rows (97%), 94 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,364 rows (97%), 89 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,338 rows (97%), 115 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,336 rows (97%), 117 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,350 rows (97%), 103 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,368 rows (98%), 85 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,332 rows (96%), 121 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,303 rows (96%), 150 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,291 rows (95%), 162 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,277 rows (95%), 176 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,297 rows (95%), 156 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,291 rows (95%), 162 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,273 rows (95%), 180 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,320 rows (96%), 133 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,305 rows (96%), 148 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,277 rows (95%), 176 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,287 rows (95%), 166 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,240 rows (94%), 213 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,228 rows (93%), 225 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,243 rows (94%), 210 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,361 rows (97%), 92 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

# Now create a data frame from the list of all annual values
big_dat_oxy <- dplyr::bind_rows(data_list)
big_raster_dat_oxy <- dplyr::bind_rows(raster_list)

# Plot data, looks like there's big inter-annual variation but a negative trend over time
big_raster_dat_oxy %>%
  group_by(year) %>%
  drop_na(oxy) %>%
  summarise(mean_oxy = mean(oxy)) %>%
  mutate(year_num = as.numeric(year)) %>%
  ggplot(., aes(year_num, mean_oxy)) +
  geom_point(size = 2) +
  stat_smooth(method = "lm") +
  NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> summarise: now 27 rows and 2 columns, ungrouped
#> mutate: new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'


big_raster_dat_oxy %>%
  group_by(year) %>%
  drop_na(oxy) %>%
  mutate(dead = ifelse(oxy < 0, "Y", "N")) %>%
  filter(dead == "Y") %>%
  mutate(n = n(),
         year_num = as.numeric(year)) %>%
  ggplot(., aes(year_num, n)) +
  geom_point(size = 2) +
  stat_smooth(method = "lm") +
  NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> mutate (grouped): new variable 'dead' (character) with 2 unique values and 0% NA
#> filter (grouped): removed 437,238 rows (93%), 31,671 rows remaining
#> mutate (grouped): new variable 'n' (integer) with 27 unique values and 0% NA
#>                   new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'


# Now add in the new oxygen column in the original data:
str(d_sub_oxy)
#> tibble [3,453 × 12] (S3: tbl_df/tbl/data.frame)
#>  $ density    : num [1:3453] 71 144.28 850.94 19.18 4.24 ...
#>  $ year       : int [1:3453] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#>  $ lat        : num [1:3453] 57.5 57.5 54.7 57.1 57.8 ...
#>  $ lon        : num [1:3453] 17.1 17.6 13.7 18.9 19.5 ...
#>  $ quarter    : int [1:3453] 4 4 4 4 4 4 4 4 4 4 ...
#>  $ haul.id    : chr [1:3453] "1993:4:SWE:ARG:FOT:262:31" "1993:4:SWE:ARG:FOT:263:32" "1993:4:GFR:SOL:H20:34:60" "1993:4:SWE:ARG:FOT:256:25" ...
#>  $ IDx        : chr [1:3453] "1993.4.SWE.77AR.FOT.262.31" "1993.4.SWE.77AR.FOT.263.32" "1993.4.GFR.06S1.H20.34.60" "1993.4.SWE.77AR.FOT.256.25" ...
#>  $ ices_rect  : chr [1:3453] "43G7" "43G7" "38G3" "43G8" ...
#>  $ SD         : chr [1:3453] "27" "27" "24" "28" ...
#>  $ density_fle: num [1:3453] 172.6 12.8 226.1 25.4 46 ...
#>  $ depth      : num [1:3453] 73 88 20 75 86 10 8 7 32 45 ...
#>  $ year_f     : Factor w/ 27 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_oxy)
#> tibble [3,453 × 4] (S3: tbl_df/tbl/data.frame)
#>  $ lon : num [1:3453] 17.1 17.6 13.7 18.9 19.5 ...
#>  $ lat : num [1:3453] 57.5 57.5 54.7 57.1 57.8 ...
#>  $ oxy : num [1:3453] 340 276 302 328 333 ...
#>  $ year: chr [1:3453] "1993" "1993" "1993" "1993" ...

# Create an ID for matching the oxygen data with the cpue data
dat$id_oxy <- paste(dat$year, dat$lon, dat$lat, sep = "_")
big_dat_oxy$id_oxy <- paste(big_dat_oxy$year, big_dat_oxy$lon, big_dat_oxy$lat, sep = "_")

# Which id's are not in the cpue data (dat)?
ids <- dat$id_oxy[!dat$id_oxy %in% c(big_dat_oxy$id_oxy)]

unique(ids)
#>  [1] "1992_13.45_55.0167"   "1992_13.1_54.9167"    "1992_13.9667_54.9333"
#>  [4] "1992_14.35_55.1"      "1992_14.1667_55.1333" "1992_13.9_54.5"      
#>  [7] "1992_14.2_54.5"       "1992_14.2833_55.1833" "1992_13.8_54.5167"   
#> [10] "1992_14.3667_54.5167" "1992_13.8833_54.5667" "1992_14.1333_54.5667"
#> [13] "1992_13.8_54.6333"    "1992_14.2167_54.6333" "1992_13.1167_54.65"  
#> [16] "1992_13.65_54.6833"   "1992_13.3333_54.7"    "1992_14.1_54.7"      
#> [19] "1992_13.1167_54.7167" "1992_13.55_54.7333"   "1992_13.7_54.75"     
#> [22] "1992_13.8167_54.75"   "1992_13.4167_54.7833" "1992_13.2_54.8667"   
#> [25] "1992_14.0333_54.8667" "1992_13.6333_55"      "1992_13.7667_55.1"   
#> [28] "1992_13.55_55.0333"   "1992_13.1167_54.9833" "1992_14.3_55.0333"   
#> [31] "1992_13.8667_55.1"

# Select only the columns we want to merge
big_dat_sub_oxy <- big_dat_oxy %>% dplyr::select(id_oxy, oxy)

# Remove duplicate ID (one oxy value per id)
big_dat_sub_oxy2 <- big_dat_sub_oxy %>% distinct(id_oxy, .keep_all = TRUE)
#> distinct: removed 9 rows (<1%), 3,444 rows remaining
# big_dat_sub_oxy %>% group_by(id_oxy) %>% mutate(n = n()) %>% arrange(desc(n))

# Join the data with raster-derived oxygen with the full cpue data
dat <- left_join(dat, big_dat_sub_oxy2, by = "id_oxy")
#> left_join: added one column (oxy)
#>            > rows only in x      31
#>            > rows only in y  (    0)
#>            > matched rows     3,453
#>            >                 =======
#>            > rows total       3,484

# Now the unit of oxygen is mmol/m3. I want it to be ml/L. The original model is in unit ml/L
# and it's been converted by the data host. Since it was converted without accounting for
# pressure or temperature, I can simply use the following conversion factor:
# 1 ml/l = 103/22.391 = 44.661 μmol/l -> 1 ml/l = 0.044661 mmol/l = 44.661 mmol/m^3 -> 0.0223909 ml/l = 1mmol/m^3
# https://ocean.ices.dk/tools/unitconversion.aspx

dat$oxy <- dat$oxy * 0.0223909

# Drop NA oxygen
dat <- dat %>% drop_na(oxy)
#> drop_na: removed 37 rows (1%), 3,447 rows remaining

Temperature

# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1608127623694.nc")

print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1608127623694.nc (NC_FORMAT_CLASSIC):
#> 
#>      1 variables (excluding dimension variables):
#>         float bottomT[longitude,latitude,time]   
#>             standard_name: sea_water_potential_temperature_at_sea_floor
#>             units: degrees_C
#>             long_name: Sea floor potential temperature
#>             missing_value: NaN
#>             _FillValue: NaN
#>             _ChunkSizes: 1
#>              _ChunkSizes: 523
#>              _ChunkSizes: 383
#> 
#>      3 dimensions:
#>         time  Size:324
#>             axis: T
#>             long_name: Validity time
#>             standard_name: time
#>             units: days since 1950-01-01 00:00:00
#>             calendar: gregorian
#>             _ChunkSizes: 512
#>             _CoordinateAxisType: Time
#>             valid_min: 15721.5
#>             valid_max: 25551.5
#>         latitude  Size:523
#>             axis: Y
#>             standard_name: latitude
#>             long_name: latitude
#>             units: degrees_north
#>             _CoordinateAxisType: Lat
#>             valid_min: 48.49169921875
#>             valid_max: 65.8914184570312
#>         longitude  Size:383
#>             standard_name: longitude
#>             long_name: longitude
#>             units: degrees_east
#>             axis: X
#>             _CoordinateAxisType: Lon
#>             valid_min: 9.01375484466553
#>             valid_max: 30.2357654571533
#> 
#>     24 global attributes:
#>         references: http://www.smhi.se
#>         institution: Swedish Meterological and Hydrological Institute
#>         history: See source and creation_date attributees
#>         Conventions: CF-1.5
#>         contact: servicedesk_cmems@mercator-ocean.eu
#>         comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#>         bullentin_type: reanalysis
#>         cmems_product_id: BALTICSEA_REANALYSIS_PHY_003_011
#>         title: CMEMS V4 Reanalysis: NEMO model 3D fields (monthly means)
#>         FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#>         FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#>         FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#>         FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#>         shallowest_depth: 1.50136542320251
#>         deepest_depth: 711.059204101562
#>         source: SMHI reanalysis run NORDIC-NS2_1d_20191201_20191202
#>         file_quality_index: 1
#>         creation_date: 2020-11-16 UTC
#>         bullentin_date: 20191201
#>         start_date: 2019-12-01 UTC
#>         stop_date: 2019-12-01 UTC
#>         start_time: 00:00 UTC
#>         stop_time: 00:00 UTC
#>         _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention

# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530

lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 48.49170 48.52503 48.55836 48.59170 48.62503 48.65836

# Get time
time <- ncvar_get(ncin,"time")
time
#>   [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#>  [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#>  [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#>  [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#>  [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#>  [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#>  [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#>  [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#>  [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#>  [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#>  [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5

tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 324
tunits
#> $hasatt
#> [1] TRUE
#> 
#> $value
#> [1] "days since 1950-01-01 00:00:00"

# Get temperature
dname <- "bottomT"

temp_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(temp_array)
#> [1] 383 523 324

# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])

# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))

# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)

# Replace netCDF fill values with NA's
temp_array[temp_array == fillvalue$value] <- NA

# We only use Quarter 4 in this analysis, so now we want to loop through each time step,
# and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#>   [1]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#>  [26]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#>  [51]  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3
#>  [76]  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4
#> [101]  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5
#> [126]  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6
#> [151]  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7
#> [176]  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8
#> [201]  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9
#> [226] 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10
#> [251] 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11
#> [276] 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12
#> [301]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12

index_keep <- which(months > 9)

# Quarter 4 by keeping months in index_keep
temp_q4 <- temp_array[, , index_keep]

months_keep <- months[index_keep]

years_keep <- years[index_keep]

# Now we have an array with only Q4 data...
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq <- seq(1, dim(temp_q4)[3], by = 3)

# Create objects that will hold data
dlist <- list()
temp_10 <- c()
temp_11 <- c()
temp_12 <- c()
temp_ave <- c()

# Loop through the vector sequence with every third value, then take the average of
# three consecutive months (i.e. q4)
for(i in loop_seq) {
  
  temp_10 <- temp_q4[, , (i)]
  temp_11 <- temp_q4[, , (i + 1)]
  temp_12 <- temp_q4[, , (i + 2)]
  
  temp_ave <- (temp_10 + temp_11 + temp_12) / 3
  
  list_pos <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  
  dlist[[list_pos]] <- temp_ave
  
}

# Now name the lists with the year:
names(dlist) <- unique(years_keep)

# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script

# Filter years in the cpue data frame to only have the years I have temperature for
d_sub_temp <- dat %>% filter(year %in% names(dlist)) %>% droplevels()
#> filter: no rows removed

# Create data holding object
data_list <- list()

# ... And for the temperature raster
raster_list <- list()

# Create factor year for indexing the list in the loop
d_sub_temp$year_f <- as.factor(d_sub_temp$year)

# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_temp$year_f)) {
  
  # Subset a year
  temp_slice <- dlist[[i]]
  
  # Create raster for that year (i)
  r <- raster(t(temp_slice), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
              crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  
  # Flip...
  r <- flip(r, direction = 'y')
  
  #plot(r, main = i)
  
  # Filter the same year (i) in the cpue data and select only coordinates
  d_slice <- d_sub_temp %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  
  # Make into a SpatialPoints object
  data_sp <- SpatialPoints(d_slice)
  
  # Extract raster value (temperature)
  rasValue <- raster::extract(r, data_sp)
  
  # Now we want to plot the results of the raster extractions by plotting the cpue
  # data points over a raster and saving it for each year.
  # Make the SpatialPoints object into a raster again (for pl)
  df <- as.data.frame(data_sp)
  
  # Add in the raster value in the df holding the coordinates for the cpue data
  d_slice$temp <- rasValue
  
  # Add in which year
  d_slice$year <- i
  
  # Create a index for the data last where we store all years (because our loop index
  # i is not continuous, we can't use it directly)
  index <- as.numeric(d_slice$year)[1] - 1992
  
  # Add each years' data in the list
  data_list[[index]] <- d_slice
  
  # Save to check each year is ok! First convert the raster to points for plotting
  # (so that we can use ggplot)
  map.p <- rasterToPoints(r)
  
  # Make the points a dataframe for ggplot
  df_rast <- data.frame(map.p)
  
  # Rename y-variable and add year
  df_rast <- df_rast %>% rename("temp" = "layer") %>% mutate(year = i)
  
  # Add each years' raster data frame in the list
  raster_list[[index]] <- df_rast
  
  # Make appropriate column headings
  colnames(df_rast) <- c("Longitude", "Latitude", "temp")
  
  # Now make the map
  ggplot(data = df_rast, aes(y = Latitude, x = Longitude)) +
    geom_raster(aes(fill = temp)) +
    geom_point(data = d_slice, aes(x = lon, y = lat, fill = temp),
               color = "black", size = 5, shape = 21) +
    theme_bw() +
    geom_sf(data = world, inherit.aes = F, size = 0.2) +
    coord_sf(xlim = c(min(dat$lon), max(dat$lon)),
             ylim = c(min(dat$lat), max(dat$lat))) +
    scale_colour_gradientn(colours = rev(terrain.colors(10)),
                           limits = c(2, 17)) +
    scale_fill_gradientn(colours = rev(terrain.colors(10)),
                         limits = c(2, 17)) +
    NULL
  
  ggsave(paste("figures/supp/cpue_temp_rasters/", i,".png", sep = ""),
         width = 6.5, height = 6.5, dpi = 600)
  
}
#> filter: removed 3,387 rows (98%), 60 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,385 rows (98%), 62 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,394 rows (98%), 53 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,386 rows (98%), 61 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,372 rows (98%), 75 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,379 rows (98%), 68 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,353 rows (97%), 94 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,358 rows (97%), 89 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,332 rows (97%), 115 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,330 rows (97%), 117 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,344 rows (97%), 103 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,362 rows (98%), 85 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,326 rows (96%), 121 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,297 rows (96%), 150 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,285 rows (95%), 162 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,272 rows (95%), 175 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,293 rows (96%), 154 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,286 rows (95%), 161 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,267 rows (95%), 180 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,315 rows (96%), 132 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,299 rows (96%), 148 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,271 rows (95%), 176 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,281 rows (95%), 166 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,234 rows (94%), 213 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,222 rows (93%), 225 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,237 rows (94%), 210 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,355 rows (97%), 92 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

# Now create a data frame from the list of all annual values
big_dat_temp <- dplyr::bind_rows(data_list)
big_raster_dat_temp <- dplyr::bind_rows(raster_list)

big_dat_temp %>% drop_na(temp) %>% summarise(max = max(temp))
#> drop_na: no rows removed
#> summarise: now one row and one column, ungrouped
#> # A tibble: 1 x 1
#>     max
#>   <dbl>
#> 1  14.5
big_dat_temp %>% drop_na(temp) %>% summarise(min = min(temp))
#> drop_na: no rows removed
#> summarise: now one row and one column, ungrouped
#> # A tibble: 1 x 1
#>     min
#>   <dbl>
#> 1  3.53

# Plot data, looks like there's big inter-annual variation but a positive trend
big_raster_dat_temp %>%
  group_by(year) %>%
  drop_na(temp) %>%
  summarise(mean_temp = mean(temp)) %>%
  mutate(year_num = as.numeric(year)) %>%
  ggplot(., aes(year_num, mean_temp)) +
  geom_point(size = 2) +
  stat_smooth(method = "lm") +
  NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> summarise: now 27 rows and 2 columns, ungrouped
#> mutate: new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'


# Now add in the new temperature column in the original data:
str(d_sub_temp)
#> tibble [3,447 × 14] (S3: tbl_df/tbl/data.frame)
#>  $ density    : num [1:3447] 71 144.28 850.94 19.18 4.24 ...
#>  $ year       : int [1:3447] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#>  $ lat        : num [1:3447] 57.5 57.5 54.7 57.1 57.8 ...
#>  $ lon        : num [1:3447] 17.1 17.6 13.7 18.9 19.5 ...
#>  $ quarter    : int [1:3447] 4 4 4 4 4 4 4 4 4 4 ...
#>  $ haul.id    : chr [1:3447] "1993:4:SWE:ARG:FOT:262:31" "1993:4:SWE:ARG:FOT:263:32" "1993:4:GFR:SOL:H20:34:60" "1993:4:SWE:ARG:FOT:256:25" ...
#>  $ IDx        : chr [1:3447] "1993.4.SWE.77AR.FOT.262.31" "1993.4.SWE.77AR.FOT.263.32" "1993.4.GFR.06S1.H20.34.60" "1993.4.SWE.77AR.FOT.256.25" ...
#>  $ ices_rect  : chr [1:3447] "43G7" "43G7" "38G3" "43G8" ...
#>  $ SD         : chr [1:3447] "27" "27" "24" "28" ...
#>  $ density_fle: num [1:3447] 172.6 12.8 226.1 25.4 46 ...
#>  $ depth      : num [1:3447] 73 88 20 75 86 10 8 7 32 45 ...
#>  $ id_oxy     : chr [1:3447] "1993_17.0833_57.4716" "1993_17.555_57.4767" "1993_13.65_54.6833" "1993_18.8633_57.0817" ...
#>  $ oxy        : num [1:3447] 7.62 6.18 6.75 7.34 7.45 ...
#>  $ year_f     : Factor w/ 27 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_temp)
#> tibble [3,447 × 4] (S3: tbl_df/tbl/data.frame)
#>  $ lon : num [1:3447] 17.1 17.6 13.7 18.9 19.5 ...
#>  $ lat : num [1:3447] 57.5 57.5 54.7 57.1 57.8 ...
#>  $ temp: num [1:3447] 5.71 4.27 8.15 4.1 4.23 ...
#>  $ year: chr [1:3447] "1993" "1993" "1993" "1993" ...

# Create an ID for matching the temperature data with the cpue data
dat$id_temp <- paste(dat$year, dat$lon, dat$lat, sep = "_")
big_dat_temp$id_temp <- paste(big_dat_temp$year, big_dat_temp$lon, big_dat_temp$lat, sep = "_")

# Which id's are not in the cpue data (dat)? (It's because I don't have those years, not about the location)
ids <- dat$id_temp[!dat$id_temp %in% c(big_dat_temp$id_temp)]

unique(ids)
#> character(0)

# Select only the columns we want to merge
big_dat_sub_temp <- big_dat_temp %>% dplyr::select(id_temp, temp)

# Remove duplicate ID (one temp value per id)
big_dat_sub_temp2 <- big_dat_sub_temp %>% distinct(id_temp, .keep_all = TRUE)
#> distinct: removed 9 rows (<1%), 3,438 rows remaining

# Join the data with raster-derived oxygen with the full cpue data
dat <- left_join(dat, big_dat_sub_temp2, by = "id_temp")
#> left_join: added one column (temp)
#>            > rows only in x       0
#>            > rows only in y  (    0)
#>            > matched rows     3,447
#>            >                 =======
#>            > rows total       3,447

colnames(dat)
#>  [1] "density"     "year"        "lat"         "lon"         "quarter"    
#>  [6] "haul.id"     "IDx"         "ices_rect"   "SD"          "density_fle"
#> [11] "depth"       "id_oxy"      "oxy"         "id_temp"     "temp"

dat <- dat %>% dplyr::select(-id_temp, -id_oxy)

# Drop NA temp
dat <- dat %>% drop_na(temp)
#> drop_na: no rows removed

Add UTM coords

# First add UTM coords
# Add UTM coords
# Function
LongLatToUTM <- function(x, y, zone){
  xy <- data.frame(ID = 1:length(x), X = x, Y = y)
  coordinates(xy) <- c("X", "Y")
  proj4string(xy) <- CRS("+proj=longlat +datum=WGS84")  ## for example
  res <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," ellps=WGS84",sep='')))
  return(as.data.frame(res))
}

utm_coords <- LongLatToUTM(dat$lon, dat$lat, zone = 33)
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
dat$X <- utm_coords$X/1000 # for computational reasons
dat$Y <- utm_coords$Y/1000 # for computational reasons

Make Gifs

Save data

write.csv(dat, file = "data/for_analysis/mdat_cpue.csv", row.names = FALSE)